mirror of
https://github.com/201206030/novel-plus.git
synced 2025-04-26 17:20:52 +00:00
优化更新策略,支持同时启动多个爬虫程序来加快小说更新速率
This commit is contained in:
parent
9df69edc2c
commit
0e2e610d18
@ -51,7 +51,6 @@ public class StarterListener implements ServletContextListener {
|
||||
//解析小说基本信息
|
||||
Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId());
|
||||
//这里只做老书更新
|
||||
book.setCrawlLastTime(currentDate);
|
||||
book.setId(needUpdateBook.getId());
|
||||
book.setPicUrl(needUpdateBook.getPicUrl());
|
||||
//查询已存在的章节
|
||||
@ -61,8 +60,6 @@ public class StarterListener implements ServletContextListener {
|
||||
bookService.updateBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY),existBookIndexMap);
|
||||
}catch (Exception e){
|
||||
log.error(e.getMessage(), e);
|
||||
//解析异常中断,更新一下小说的最后解析时间
|
||||
bookService.updateCrawlLastTime(needUpdateBook.getId());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -25,4 +25,11 @@ public interface CrawlBookMapper extends BookMapper {
|
||||
* @return 小说总字数
|
||||
* */
|
||||
Integer queryTotalWordCount(@Param("bookId") Long bookId);
|
||||
|
||||
/**
|
||||
* 批量更新小说最后抓取时间
|
||||
* @param books 需要更新的小说集合
|
||||
* @param currentDate 当前时间
|
||||
* */
|
||||
void updateCrawlLastTime(@Param("books") List<Book> books,@Param("currentDate") Date currentDate);
|
||||
}
|
||||
|
@ -99,7 +99,12 @@ public class BookServiceImpl implements BookService {
|
||||
|
||||
@Override
|
||||
public List<Book> queryNeedUpdateBook(Date startDate, int limit) {
|
||||
return bookMapper.queryNeedUpdateBook(startDate, limit);
|
||||
List<Book> books = bookMapper.queryNeedUpdateBook(startDate, limit);
|
||||
if(books.size()>0) {
|
||||
//更新最后抓取时间为当前时间
|
||||
bookMapper.updateCrawlLastTime(books, new Date());
|
||||
}
|
||||
return books;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -19,5 +19,13 @@
|
||||
on t1.id = t2.book_id and t1.id = #{bookId}
|
||||
</select>
|
||||
|
||||
<update id="updateCrawlLastTime">
|
||||
update book set crawl_last_time = #{currentDate}
|
||||
where id in
|
||||
<foreach item="book" collection="books" open="(" separator="," close=")">
|
||||
#{book.id}
|
||||
</foreach>
|
||||
</update>
|
||||
|
||||
|
||||
</mapper>
|
Loading…
x
Reference in New Issue
Block a user