diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java index a08fd3d..1cb6aaf 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java @@ -12,6 +12,8 @@ import io.github.xxyopen.util.IdWorker; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; +import org.springframework.data.redis.core.RedisTemplate; +import org.springframework.data.redis.core.StringRedisTemplate; import org.springframework.stereotype.Component; import java.text.ParseException; @@ -34,6 +36,13 @@ public class CrawlParser { private final CrawlHttpClient crawlHttpClient; + private final StringRedisTemplate stringRedisTemplate; + + /** + * 爬虫源采集章节数量缓存key + */ + private static final String CRAWL_SOURCE_CHAPTER_COUNT_CACHE_KEY = "crawlSource:chapterCount:"; + /** * 爬虫任务进度 */ @@ -53,6 +62,20 @@ public class CrawlParser { crawlTaskProgress.remove(taskId); } + /** + * 获取爬虫源采集的章节数量 + */ + public Long getCrawlSourceChapterCount(Integer sourceId) { + return Optional.ofNullable( + stringRedisTemplate.opsForValue().get(CRAWL_SOURCE_CHAPTER_COUNT_CACHE_KEY + sourceId)).map(v -> { + try { + return Long.parseLong(v); + } catch (NumberFormatException e) { + return 0L; + } + }).orElse(0L); + } + public void parseBook(RuleBean ruleBean, String bookId, CrawlBookHandler handler) throws InterruptedException { Book book = new Book(); @@ -182,7 +205,7 @@ public class CrawlParser { handler.handle(book); } - public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean, + public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean, Integer sourceId, Map existBookIndexMap, CrawlBookChapterHandler handler, CrawlSingleTask task) throws InterruptedException { @@ -314,10 +337,12 @@ public class CrawlParser { bookIndex.setUpdateTime(currentDate); if (task != null) { - // 更新采集进度 + // 更新单本任务采集进度 crawlTaskProgress.put(task.getId(), indexList.size()); } + // 更新爬虫源采集章节数量 + stringRedisTemplate.opsForValue().increment(CRAWL_SOURCE_CHAPTER_COUNT_CACHE_KEY + sourceId); } diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java b/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java index 4ca21cd..cfad5ee 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java @@ -74,7 +74,7 @@ public class StarterListener implements ServletContextInitializer { needUpdateBook.getId()); //解析章节目录 crawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book, - ruleBean, existBookIndexMap, + ruleBean, needUpdateBook.getCrawlSourceId(), existBookIndexMap, chapter -> bookService.updateBookAndIndexAndContent(book, chapter.getBookIndexList(), chapter.getBookContentList(), existBookIndexMap), null); diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java index 305191c..e65d7e9 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java @@ -104,10 +104,15 @@ public class CrawlServiceImpl implements CrawlService { .build() .render(RenderingStrategies.MYBATIS3); List crawlSources = crawlSourceMapper.selectMany(render); - crawlSources.forEach(crawlSource -> crawlSource.setSourceStatus( - Optional.ofNullable(crawlSourceStatusMap.get(crawlSource.getId())).orElse((byte) 0))); PageBean pageBean = PageBuilder.build(crawlSources); - pageBean.setList(BeanUtil.copyList(crawlSources, CrawlSourceVO.class)); + List crawlSourceVOS = BeanUtil.copyList(crawlSources, CrawlSourceVO.class); + crawlSourceVOS.forEach(crawlSource -> { + crawlSource.setSourceStatus( + Optional.ofNullable(crawlSourceStatusMap.get(crawlSource.getId())).orElse((byte) 0)); + crawlSource.setChapterCount(crawlParser.getCrawlSourceChapterCount(crawlSource.getId())); + } + ); + pageBean.setList(crawlSourceVOS); return pageBean; } @@ -386,7 +391,7 @@ public class CrawlServiceImpl implements CrawlService { book.setCrawlLastTime(new Date()); book.setId(idWorker.nextId()); //解析章节目录 - boolean parseIndexContentResult = crawlParser.parseBookIndexAndContent(bookId, book, ruleBean, + boolean parseIndexContentResult = crawlParser.parseBookIndexAndContent(bookId, book, ruleBean, sourceId, new HashMap<>(0), chapter -> { bookService.saveBookAndIndexAndContent(book, chapter.getBookIndexList(), chapter.getBookContentList()); diff --git a/novel-crawl/src/main/java/com/java2nb/novel/vo/CrawlSourceVO.java b/novel-crawl/src/main/java/com/java2nb/novel/vo/CrawlSourceVO.java index 45860fb..d15b636 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/vo/CrawlSourceVO.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/vo/CrawlSourceVO.java @@ -20,7 +20,7 @@ public class CrawlSourceVO extends CrawlSource{ @JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm") private Date updateTime; - + private Long chapterCount; @Override public String toString() { diff --git a/novel-crawl/src/main/resources/templates/crawl/crawlSource_list.html b/novel-crawl/src/main/resources/templates/crawl/crawlSource_list.html index 35ebdbb..6d68121 100644 --- a/novel-crawl/src/main/resources/templates/crawl/crawlSource_list.html +++ b/novel-crawl/src/main/resources/templates/crawl/crawlSource_list.html @@ -43,7 +43,7 @@ 序号 - + 爬虫源 @@ -52,6 +52,9 @@ 更新时间 + + 采集数量 + 状态 @@ -111,11 +114,17 @@