mirror of
https://github.com/201206030/novel-plus.git
synced 2025-04-27 01:30:51 +00:00
优化更新策略,支持同时启动多个爬虫程序来加快小说更新速率
This commit is contained in:
parent
9df69edc2c
commit
0e2e610d18
@ -51,7 +51,6 @@ public class StarterListener implements ServletContextListener {
|
|||||||
//解析小说基本信息
|
//解析小说基本信息
|
||||||
Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId());
|
Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId());
|
||||||
//这里只做老书更新
|
//这里只做老书更新
|
||||||
book.setCrawlLastTime(currentDate);
|
|
||||||
book.setId(needUpdateBook.getId());
|
book.setId(needUpdateBook.getId());
|
||||||
book.setPicUrl(needUpdateBook.getPicUrl());
|
book.setPicUrl(needUpdateBook.getPicUrl());
|
||||||
//查询已存在的章节
|
//查询已存在的章节
|
||||||
@ -61,8 +60,6 @@ public class StarterListener implements ServletContextListener {
|
|||||||
bookService.updateBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY),existBookIndexMap);
|
bookService.updateBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY),existBookIndexMap);
|
||||||
}catch (Exception e){
|
}catch (Exception e){
|
||||||
log.error(e.getMessage(), e);
|
log.error(e.getMessage(), e);
|
||||||
//解析异常中断,更新一下小说的最后解析时间
|
|
||||||
bookService.updateCrawlLastTime(needUpdateBook.getId());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -25,4 +25,11 @@ public interface CrawlBookMapper extends BookMapper {
|
|||||||
* @return 小说总字数
|
* @return 小说总字数
|
||||||
* */
|
* */
|
||||||
Integer queryTotalWordCount(@Param("bookId") Long bookId);
|
Integer queryTotalWordCount(@Param("bookId") Long bookId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 批量更新小说最后抓取时间
|
||||||
|
* @param books 需要更新的小说集合
|
||||||
|
* @param currentDate 当前时间
|
||||||
|
* */
|
||||||
|
void updateCrawlLastTime(@Param("books") List<Book> books,@Param("currentDate") Date currentDate);
|
||||||
}
|
}
|
||||||
|
@ -99,7 +99,12 @@ public class BookServiceImpl implements BookService {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Book> queryNeedUpdateBook(Date startDate, int limit) {
|
public List<Book> queryNeedUpdateBook(Date startDate, int limit) {
|
||||||
return bookMapper.queryNeedUpdateBook(startDate, limit);
|
List<Book> books = bookMapper.queryNeedUpdateBook(startDate, limit);
|
||||||
|
if(books.size()>0) {
|
||||||
|
//更新最后抓取时间为当前时间
|
||||||
|
bookMapper.updateCrawlLastTime(books, new Date());
|
||||||
|
}
|
||||||
|
return books;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -19,5 +19,13 @@
|
|||||||
on t1.id = t2.book_id and t1.id = #{bookId}
|
on t1.id = t2.book_id and t1.id = #{bookId}
|
||||||
</select>
|
</select>
|
||||||
|
|
||||||
|
<update id="updateCrawlLastTime">
|
||||||
|
update book set crawl_last_time = #{currentDate}
|
||||||
|
where id in
|
||||||
|
<foreach item="book" collection="books" open="(" separator="," close=")">
|
||||||
|
#{book.id}
|
||||||
|
</foreach>
|
||||||
|
</update>
|
||||||
|
|
||||||
|
|
||||||
</mapper>
|
</mapper>
|
Loading…
x
Reference in New Issue
Block a user