diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BaseCrawlSource.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BaseCrawlSource.java index 3096606..c6c92a3 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BaseCrawlSource.java +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BaseCrawlSource.java @@ -17,4 +17,10 @@ public abstract class BaseCrawlSource { * 解析数据 * */ public abstract void parse(); + + + /** + * 更新书籍 + * */ + public abstract void update(); } diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java index 78ba6e5..95b7272 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java @@ -3,9 +3,12 @@ package xyz.zinglizingli.books.core.crawl; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; +import xyz.zinglizingli.books.core.utils.Constants; +import xyz.zinglizingli.books.mapper.BookParseLogMapper; import xyz.zinglizingli.books.po.Book; import xyz.zinglizingli.books.po.BookContent; import xyz.zinglizingli.books.po.BookIndex; +import xyz.zinglizingli.books.po.BookParseLog; import xyz.zinglizingli.books.service.BookService; import xyz.zinglizingli.books.core.utils.CatUtil; import xyz.zinglizingli.common.utils.ExcutorUtils; @@ -34,177 +37,208 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource { @Override public void parse() { - String catBookListUrl = getListPageUrl().replace("{0}", "0").replace("{1}", "1"); - String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl); - if (forObject != null) { - //解析第一页书籍的数据 - Pattern bookPatten = compile(getBookUrlPattern()); + for(int page = 1; page<= Constants.UPDATE_PAGES_ONCE; page++) { + String catBookListUrl = getListPageUrl().replace("{0}", "0").replace("{1}", page+""); + String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl); + if (forObject != null) { + //解析第一页书籍的数据 + Pattern bookPatten = compile(getBookUrlPattern()); - Matcher bookMatcher = bookPatten.matcher(forObject); + Matcher bookMatcher = bookPatten.matcher(forObject); - boolean isFind = bookMatcher.find(); - Pattern scorePatten = compile(getScorePattern()); - Matcher scoreMatch = scorePatten.matcher(forObject); - boolean scoreFind = scoreMatch.find(); + boolean isFind = bookMatcher.find(); + Pattern scorePatten = compile(getScorePattern()); + Matcher scoreMatch = scorePatten.matcher(forObject); + boolean scoreFind = scoreMatch.find(); - Pattern bookNamePatten = compile(getBookNamePattern()); + Pattern bookNamePatten = compile(getBookNamePattern()); - Matcher bookNameMatch = bookNamePatten.matcher(forObject); + Matcher bookNameMatch = bookNamePatten.matcher(forObject); - boolean isBookNameMatch = bookNameMatch.find(); + boolean isBookNameMatch = bookNameMatch.find(); - while (isFind && scoreFind && isBookNameMatch) { - - try { - Float score = Float.parseFloat(scoreMatch.group(1)); - - if (score < getLowestScore()) { - continue; - } - - String bokNum = bookMatcher.group(1); - String bookUrl = getIndexUrl() + "/" + bokNum + "/"; - - String body = RestTemplateUtil.getBodyByUtf8(bookUrl); - if (body != null) { - - String bookName = bookNameMatch.group(1); - Pattern authorPatten = compile(getAuthorPattern()); - Matcher authoreMatch = authorPatten.matcher(body); - if (authoreMatch.find()) { - String author = authoreMatch.group(1); - - Pattern statusPatten = compile(getStatusPattern()); - Matcher statusMatch = statusPatten.matcher(body); - if (statusMatch.find()) { - String status = statusMatch.group(1); - - Pattern catPatten = compile(getCatPattern()); - Matcher catMatch = catPatten.matcher(body); - if (catMatch.find()) { - String catName = catMatch.group(1); - int catNum = CatUtil.getCatNum(catName); - - - Pattern updateTimePatten = compile(getUpdateTimePattern()); - Matcher updateTimeMatch = updateTimePatten.matcher(body); - if (updateTimeMatch.find()) { - String updateTimeStr = updateTimeMatch.group(1); - SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss"); - Date updateTime = format.parse(updateTimeStr); - Pattern picPatten = compile(getPicPattern()); - Matcher picMather = picPatten.matcher(body); - if (picMather.find()) { - String picSrc = picMather.group(1); - String desc = body.substring(body.indexOf("
") + "
".length()); - desc = desc.substring(0, desc.indexOf("
")); - - - Book book = new Book(); - book.setAuthor(author); - book.setCatid(catNum); - book.setBookDesc(desc); - book.setBookName(bookName); - book.setScore(score > 10 ? 8.0f : score); - book.setPicUrl(picSrc); - book.setBookStatus(status); - book.setUpdateTime(updateTime); - - List