去除TXT小说下载的广告内容

This commit is contained in:
xiongxiaoyang 2019-12-20 10:24:55 +08:00
parent a5fa5766ac
commit ff05245493
2 changed files with 8 additions and 2 deletions

View File

@ -90,4 +90,9 @@ public class Constants {
* 没有内容的描述 * 没有内容的描述
* */ * */
public static final String NO_CONTENT_DESC = "正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新"; public static final String NO_CONTENT_DESC = "正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新";
/**
* 书籍内容页的广告pattern
* */
public static final String CONTENT_AD_PATTERN = "<div[^>]+app\\.html[^>]+>\\s*<div[^>]+>\\s*<div[^>]+>[^<]+</div>\\s*<div[^>]+>[^<]+<span[^>]+>>>[^<]+<<</span>\\s*</div>\\s*</div>\\s*</div>";
} }

View File

@ -252,7 +252,7 @@ public class BookController {
List<Integer> preAndNextIndexNum = bookService.queryPreAndNextIndexNum(bookId, indexNum); List<Integer> preAndNextIndexNum = bookService.queryPreAndNextIndexNum(bookId, indexNum);
modelMap.put("nextIndexNum", preAndNextIndexNum.get(0)); modelMap.put("nextIndexNum", preAndNextIndexNum.get(0));
modelMap.put("preIndexNum", preAndNextIndexNum.get(1)); modelMap.put("preIndexNum", preAndNextIndexNum.get(1));
bookContent.setContent(bookContent.getContent().replaceAll("<div[^>]+app\\.html[^>]+>\\s*<div[^>]+>\\s*<div[^>]+>[^<]+</div>\\s*<div[^>]+>[^<]+<span[^>]+>>>[^<]+<<</span>\\s*</div>\\s*</div>\\s*</div>", "")); bookContent.setContent(bookContent.getContent().replaceAll(Constants.CONTENT_AD_PATTERN, ""));
modelMap.put("bookContent", bookContent); modelMap.put("bookContent", bookContent);
modelMap.put("indexName", indexName); modelMap.put("indexName", indexName);
Book basicBook = bookService.queryBaseInfo(bookId); Book basicBook = bookService.queryBaseInfo(bookId);
@ -343,7 +343,8 @@ public class BookController {
String content = bookService.queryContentList(bookId, i); String content = bookService.queryContentList(bookId, i);
out.write(index.getBytes(StandardCharsets.UTF_8)); out.write(index.getBytes(StandardCharsets.UTF_8));
out.write("\n".getBytes(StandardCharsets.UTF_8)); out.write("\n".getBytes(StandardCharsets.UTF_8));
content = content.replaceAll("<br\\s*/*>", "\r\n") content = content.replaceAll(Constants.CONTENT_AD_PATTERN, "")
.replaceAll("<br\\s*/*>", "\r\n")
.replaceAll("&nbsp;", " ") .replaceAll("&nbsp;", " ")
.replaceAll("<a[^>]*>", "") .replaceAll("<a[^>]*>", "")
.replaceAll("</a>", "") .replaceAll("</a>", "")