diff --git a/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java b/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java index 23bfee5..4c3a7a2 100644 --- a/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java +++ b/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java @@ -522,88 +522,85 @@ public class BookCrawlServiceImpl implements BookCrawlService { if (picMather.find()) { String picSrc = picMather.group(1); - Pattern descPatten = compile("class=\"review\">([^<]+)

"); - Matcher descMatch = descPatten.matcher(body); - if (descMatch.find()) { - String desc = descMatch.group(1); + String desc = body.substring(body.indexOf("

") + "

".length()); + desc = desc.substring(0, desc.indexOf("

")); - BookDO book = new BookDO(); - book.setAuthor(author); - book.setCatid(catNum); - book.setBookDesc(desc); - book.setBookName(bookName); - book.setScore(score > 10 ? 8.0f : score); - book.setPicUrl(picSrc); - book.setBookStatus(status); - book.setUpdateTime(updateTime); + BookDO book = new BookDO(); + book.setAuthor(author); + book.setCatid(catNum); + book.setBookDesc(desc); + book.setBookName(bookName); + book.setScore(score > 10 ? 8.0f : score); + book.setPicUrl(picSrc); + book.setBookStatus(status); + book.setUpdateTime(updateTime); - List indexList = new ArrayList<>(); - List contentList = new ArrayList<>(); + List indexList = new ArrayList<>(); + List contentList = new ArrayList<>(); - //读取目录 - Pattern indexPatten = compile("查看完整目录"); - Matcher indexMatch = indexPatten.matcher(body); - if (indexMatch.find()) { - String indexUrl = baseUrl + indexMatch.group(1); - String body2 = getByTemplate(indexUrl); - if (body2 != null) { - Pattern indexListPatten = compile("([^/]+)"); - Matcher indexListMatch = indexListPatten.matcher(body2); + //读取目录 + Pattern indexPatten = compile("查看完整目录"); + Matcher indexMatch = indexPatten.matcher(body); + if (indexMatch.find()) { + String indexUrl = baseUrl + indexMatch.group(1); + String body2 = getByTemplate(indexUrl); + if (body2 != null) { + Pattern indexListPatten = compile("([^/]+)"); + Matcher indexListMatch = indexListPatten.matcher(body2); - boolean isFindIndex = indexListMatch.find(); + boolean isFindIndex = indexListMatch.find(); - int indexNum = 0; - //查询该书籍已存在目录号 - List hasIndexNum = queryIndexCountByBookNameAndBAuthor(bookName, author); + int indexNum = 0; + //查询该书籍已存在目录号 + List hasIndexNum = queryIndexCountByBookNameAndBAuthor(bookName, author); - while (isFindIndex) { - if (isInteruptBiquTaCrawl) { - return; - } - - if (!hasIndexNum.contains(indexNum)) { - - String contentUrl = baseUrl + indexListMatch.group(1); - String indexName = indexListMatch.group(2); - - - //查询章节内容 - String body3 = getByTemplate(contentUrl.replace("//m.","//www.")); - if (body3 != null) { - String start = "id=\"content\">"; - String end = "