diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java b/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java index 626b59a..6a81f3b 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java @@ -9,9 +9,11 @@ import com.java2nb.novel.entity.BookIndex; import com.java2nb.novel.entity.CrawlSource; import com.java2nb.novel.service.BookService; import com.java2nb.novel.service.CrawlService; +import com.java2nb.novel.utils.Constants; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.time.DateUtils; +import org.springframework.beans.factory.annotation.Value; import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener; @@ -33,43 +35,54 @@ public class StarterListener implements ServletContextListener { private final CrawlService crawlService; + @Value("${crawl.update.thread}") + private int updateThreadCount; + @Override public void contextInitialized(ServletContextEvent sce) { log.info("程序启动,开始执行自动更新线程。。。"); - new Thread(() -> { - while (true) { - try { - //1.查询最新目录更新时间在一个月之内的前100条需要更新的数据 - Date currentDate = new Date(); - Date startDate = DateUtils.addDays(currentDate, -30); - List bookList = bookService.queryNeedUpdateBook(startDate, 100); - for (Book needUpdateBook : bookList) { - try { - //查询爬虫源规则 - CrawlSource source = crawlService.queryCrawlSource(needUpdateBook.getCrawlSourceId()); - RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class); - //解析小说基本信息 - Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId()); - //这里只做老书更新 - book.setId(needUpdateBook.getId()); - book.setPicUrl(needUpdateBook.getPicUrl()); - //查询已存在的章节 - Map existBookIndexMap = bookService.queryExistBookIndexMap(needUpdateBook.getId()); - //解析章节目录 - Map indexAndContentList = CrawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(),book, ruleBean, existBookIndexMap); - bookService.updateBookAndIndexAndContent(book, (List) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY),existBookIndexMap); - }catch (Exception e){ - log.error(e.getMessage(), e); + for(int i = 0 ; i { + while (true) { + try { + //1.查询最新目录更新时间在一个月之内的前100条需要更新的数据 + Date currentDate = new Date(); + Date startDate = DateUtils.addDays(currentDate, -30); + List bookList ; + synchronized (this) { + bookList = bookService.queryNeedUpdateBook(startDate, 100); + } + for (Book needUpdateBook : bookList) { + try { + //查询爬虫源规则 + CrawlSource source = crawlService.queryCrawlSource(needUpdateBook.getCrawlSourceId()); + RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class); + //解析小说基本信息 + Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId()); + //这里只做老书更新 + book.setId(needUpdateBook.getId()); + if(needUpdateBook.getPicUrl()!=null && needUpdateBook.getPicUrl().startsWith(Constants.LOCAL_PIC_PREFIX)) { + //本地图片则不更新 + book.setPicUrl(null); + } + //查询已存在的章节 + Map existBookIndexMap = bookService.queryExistBookIndexMap(needUpdateBook.getId()); + //解析章节目录 + Map indexAndContentList = CrawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book, ruleBean, existBookIndexMap); + bookService.updateBookAndIndexAndContent(book, (List) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY), existBookIndexMap); + } catch (Exception e) { + log.error(e.getMessage(), e); + } + } + Thread.sleep(1000 * 60 * 10); + } catch (Exception e) { + log.error(e.getMessage(), e); } - Thread.sleep(1000 * 60 * 10); - } catch (Exception e) { - log.error(e.getMessage(), e); } - - } - }).start(); + }).start(); + } } } diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/BookServiceImpl.java b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/BookServiceImpl.java index a8a34e5..02a627a 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/BookServiceImpl.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/BookServiceImpl.java @@ -180,10 +180,6 @@ public class BookServiceImpl implements BookService { if(Constants.VISIT_COUNT_DEFAULT.equals(book.getVisitCount())) { book.setVisitCount(null); } - if(book.getPicUrl()!=null && book.getPicUrl().startsWith(Constants.LOCAL_PIC_PREFIX)) { - //本地图片则不更新 - book.setPicUrl(null); - } bookMapper.updateByPrimaryKeySelective(book); } diff --git a/novel-crawl/src/main/resources/application.yml b/novel-crawl/src/main/resources/application.yml index c17bd85..394d285 100644 --- a/novel-crawl/src/main/resources/application.yml +++ b/novel-crawl/src/main/resources/application.yml @@ -1,3 +1,4 @@ +#端口号 server: port: 8081 @@ -5,9 +6,16 @@ spring: profiles: active: dev - +#登录用户名密码 admin: username: admin password: admin +#爬虫自动更新的线程数 +#建议小说数量不多或者正在运行新书入库爬虫的情况下设置为1即可 +#随着小说数量的增多可以逐渐增加,但建议不要超出CPU的线程数 +crawl: + update: + thread: 1 +