From adc83db64e10067cb7ff2375a4e66a93f23104e3 Mon Sep 17 00:00:00 2001 From: xiongxiaoyang <1179705413@qq.com> Date: Sat, 21 Jun 2025 12:32:11 +0800 Subject: [PATCH] =?UTF-8?q?perf(novel-crawl):=20=E5=8E=BB=E9=99=A4?= =?UTF-8?q?=E5=B0=8F=E8=AF=B4=E5=86=85=E5=AE=B9=E6=9C=AB=E5=B0=BE=E7=9A=84?= =?UTF-8?q?=E6=89=80=E6=9C=89=E6=8D=A2=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/java2nb/novel/core/crawl/CrawlParser.java | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java index 95b0165..e0d5dbc 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java @@ -238,6 +238,8 @@ public class CrawlParser { } } } + // 去除小说内容末尾的所有换行 + content = removeTrailingBrTags(content); //插入章节目录和章节内容 BookIndex bookIndex = new BookIndex(); bookIndex.setIndexName(indexName); @@ -314,4 +316,12 @@ public class CrawlParser { return false; } + + /** + * 删除字符串末尾的所有
类似标签(允许各种空格) + */ + public static String removeTrailingBrTags(String str) { + return str.replaceAll("(?i)(?:\\s*<\\s*br\\s*/?\\s*>)++(?:\\s|\\u3000)*$", ""); + } + }