diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java index 95b0165..e0d5dbc 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java @@ -238,6 +238,8 @@ public class CrawlParser { } } } + // 去除小说内容末尾的所有换行 + content = removeTrailingBrTags(content); //插入章节目录和章节内容 BookIndex bookIndex = new BookIndex(); bookIndex.setIndexName(indexName); @@ -314,4 +316,12 @@ public class CrawlParser { return false; } + + /** + * 删除字符串末尾的所有
类似标签(允许各种空格) + */ + public static String removeTrailingBrTags(String str) { + return str.replaceAll("(?i)(?:\\s*<\\s*br\\s*/?\\s*>)++(?:\\s|\\u3000)*$", ""); + } + }