去除TXT小说下载的广告内容

This commit is contained in:
xiongxiaoyang 2019-12-20 10:24:55 +08:00
parent a5fa5766ac
commit ff05245493
2 changed files with 8 additions and 2 deletions

View File

@ -90,4 +90,9 @@ public class Constants {
* 没有内容的描述
* */
public static final String NO_CONTENT_DESC = "正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新";
/**
* 书籍内容页的广告pattern
* */
public static final String CONTENT_AD_PATTERN = "<div[^>]+app\\.html[^>]+>\\s*<div[^>]+>\\s*<div[^>]+>[^<]+</div>\\s*<div[^>]+>[^<]+<span[^>]+>>>[^<]+<<</span>\\s*</div>\\s*</div>\\s*</div>";
}

View File

@ -252,7 +252,7 @@ public class BookController {
List<Integer> preAndNextIndexNum = bookService.queryPreAndNextIndexNum(bookId, indexNum);
modelMap.put("nextIndexNum", preAndNextIndexNum.get(0));
modelMap.put("preIndexNum", preAndNextIndexNum.get(1));
bookContent.setContent(bookContent.getContent().replaceAll("<div[^>]+app\\.html[^>]+>\\s*<div[^>]+>\\s*<div[^>]+>[^<]+</div>\\s*<div[^>]+>[^<]+<span[^>]+>>>[^<]+<<</span>\\s*</div>\\s*</div>\\s*</div>", ""));
bookContent.setContent(bookContent.getContent().replaceAll(Constants.CONTENT_AD_PATTERN, ""));
modelMap.put("bookContent", bookContent);
modelMap.put("indexName", indexName);
Book basicBook = bookService.queryBaseInfo(bookId);
@ -343,7 +343,8 @@ public class BookController {
String content = bookService.queryContentList(bookId, i);
out.write(index.getBytes(StandardCharsets.UTF_8));
out.write("\n".getBytes(StandardCharsets.UTF_8));
content = content.replaceAll("<br\\s*/*>", "\r\n")
content = content.replaceAll(Constants.CONTENT_AD_PATTERN, "")
.replaceAll("<br\\s*/*>", "\r\n")
.replaceAll("&nbsp;", " ")
.replaceAll("<a[^>]*>", "")
.replaceAll("</a>", "")