优化更新策略,支持同时启动多个爬虫程序来加快小说更新速率

This commit is contained in:
xxy 2020-05-07 23:34:13 +08:00
parent 9df69edc2c
commit 0e2e610d18
4 changed files with 21 additions and 4 deletions

View File

@ -51,7 +51,6 @@ public class StarterListener implements ServletContextListener {
//解析小说基本信息
Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId());
//这里只做老书更新
book.setCrawlLastTime(currentDate);
book.setId(needUpdateBook.getId());
book.setPicUrl(needUpdateBook.getPicUrl());
//查询已存在的章节
@ -61,8 +60,6 @@ public class StarterListener implements ServletContextListener {
bookService.updateBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY),existBookIndexMap);
}catch (Exception e){
log.error(e.getMessage(), e);
//解析异常中断更新一下小说的最后解析时间
bookService.updateCrawlLastTime(needUpdateBook.getId());
}
}

View File

@ -25,4 +25,11 @@ public interface CrawlBookMapper extends BookMapper {
* @return 小说总字数
* */
Integer queryTotalWordCount(@Param("bookId") Long bookId);
/**
* 批量更新小说最后抓取时间
* @param books 需要更新的小说集合
* @param currentDate 当前时间
* */
void updateCrawlLastTime(@Param("books") List<Book> books,@Param("currentDate") Date currentDate);
}

View File

@ -99,7 +99,12 @@ public class BookServiceImpl implements BookService {
@Override
public List<Book> queryNeedUpdateBook(Date startDate, int limit) {
return bookMapper.queryNeedUpdateBook(startDate, limit);
List<Book> books = bookMapper.queryNeedUpdateBook(startDate, limit);
if(books.size()>0) {
//更新最后抓取时间为当前时间
bookMapper.updateCrawlLastTime(books, new Date());
}
return books;
}
@Override

View File

@ -19,5 +19,13 @@
on t1.id = t2.book_id and t1.id = #{bookId}
</select>
<update id="updateCrawlLastTime">
update book set crawl_last_time = #{currentDate}
where id in
<foreach item="book" collection="books" open="(" separator="," close=")">
#{book.id}
</foreach>
</update>
</mapper>