From a07643bde029ab63d215e4782c1c1cc8cac99a55 Mon Sep 17 00:00:00 2001 From: xiongxiaoyang <1179705413@qq.com> Date: Tue, 13 May 2025 10:45:38 +0800 Subject: [PATCH] =?UTF-8?q?fix(novel-crawl):=20=E8=A7=A3=E5=86=B3=E5=A4=9A?= =?UTF-8?q?=E4=B8=AA=E7=88=AC=E8=99=AB=E8=BF=9B=E7=A8=8B=E9=97=B4=E7=9A=84?= =?UTF-8?q?=E7=88=AC=E8=99=AB=E6=BA=90=E7=8A=B6=E6=80=81=E5=86=B2=E7=AA=81?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../core/schedule/CrawlThreadMonitor.java | 61 ------------------- .../novel/service/impl/CrawlServiceImpl.java | 34 +++++------ 2 files changed, 17 insertions(+), 78 deletions(-) delete mode 100644 novel-crawl/src/main/java/com/java2nb/novel/core/schedule/CrawlThreadMonitor.java diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/schedule/CrawlThreadMonitor.java b/novel-crawl/src/main/java/com/java2nb/novel/core/schedule/CrawlThreadMonitor.java deleted file mode 100644 index a04ce49..0000000 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/schedule/CrawlThreadMonitor.java +++ /dev/null @@ -1,61 +0,0 @@ -package com.java2nb.novel.core.schedule; - - -import com.java2nb.novel.core.cache.CacheKey; -import com.java2nb.novel.core.cache.CacheService; -import com.java2nb.novel.entity.CrawlSource; -import com.java2nb.novel.service.CrawlService; -import io.github.xxyopen.util.ThreadUtil; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.scheduling.annotation.Scheduled; -import org.springframework.stereotype.Service; - -import java.util.List; -import java.util.Set; - -/** - * 爬虫线程监控器,监控执行完成的爬虫源,并修改状态 - * - * @author Administrator - */ -@Service -@RequiredArgsConstructor -@Slf4j -public class CrawlThreadMonitor { - - private final CacheService cacheService; - - private final CrawlService crawlService; - - @Scheduled(fixedRate = 1000 * 60 * 5) - public void monitor() { - - //查询需要监控的正在运行的爬虫源 - List sources = crawlService.queryCrawlSourceByStatus((byte) 1); - - for (CrawlSource source : sources) { - Set runningCrawlThreadIds = (Set) cacheService.getObject(CacheKey.RUNNING_CRAWL_THREAD_KEY_PREFIX + source.getId()); - boolean sourceStop = true; - if (runningCrawlThreadIds != null) { - for (Long threadId : runningCrawlThreadIds) { - Thread thread = ThreadUtil.findThread(threadId); - - if (thread != null && thread.isAlive()) { - //有活跃线程,说明该爬虫源正在运行,数据库中状态正确,不需要修改 - sourceStop = false; - - } - - } - } - - if (sourceStop) { - crawlService.updateCrawlSourceStatus(source.getId(), (byte) 0); - } - - - } - - } -} diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java index 18d75d3..ca7b576 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java @@ -66,6 +66,8 @@ public class CrawlServiceImpl implements CrawlService { private final CrawlHttpClient crawlHttpClient; + private final Map crawlSourceStatusMap = new HashMap<>(); + @Override public void addCrawlSource(CrawlSource source) { @@ -104,6 +106,8 @@ public class CrawlServiceImpl implements CrawlService { .build() .render(RenderingStrategies.MYBATIS3); List crawlSources = crawlSourceMapper.selectMany(render); + crawlSources.forEach(crawlSource -> crawlSource.setSourceStatus( + Optional.ofNullable(crawlSourceStatusMap.get(crawlSource.getId())).orElse((byte) 0))); PageBean pageBean = PageBuilder.build(crawlSources); pageBean.setList(BeanUtil.copyList(crawlSources, CrawlSourceVO.class)); return pageBean; @@ -113,12 +117,12 @@ public class CrawlServiceImpl implements CrawlService { @Override public void openOrCloseCrawl(Integer sourceId, Byte sourceStatus) { - //判断是开启还是关闭,如果是关闭,则修改数据库状态后获取该爬虫正在运行的线程集合并全部停止 - //如果是开启,先查询数据库中状态,判断该爬虫源是否还在运行,如果在运行,则忽略, - // 如果没有则修改数据库状态,并启动线程爬取小说数据加入到runningCrawlThread中 + // 判断是开启还是关闭,如果是关闭,则获取该爬虫源正在运行的线程集合并全部中断 + // 如果是开启,先判断该爬虫源是否还在运行,如果在运行,则忽略,如果没有运行则启动线程爬取小说数据并加入到runningCrawlThread中 + // 最后,保存爬虫源状态 if (sourceStatus == (byte) 0) { - //关闭,直接修改数据库状态,并直接修改数据库状态后获取该爬虫正在运行的线程集合全部停止 - SpringUtil.getBean(CrawlService.class).updateCrawlSourceStatus(sourceId, sourceStatus); + // 关闭 + // 将该爬虫源正在运行的线程集合全部停止 Set runningCrawlThreadId = (Set) cacheService.getObject( CacheKey.RUNNING_CRAWL_THREAD_KEY_PREFIX + sourceId); if (runningCrawlThreadId != null) { @@ -132,16 +136,13 @@ public class CrawlServiceImpl implements CrawlService { } else { - //开启 - //查询爬虫源状态和规则 - CrawlSource source = queryCrawlSource(sourceId); - Byte realSourceStatus = source.getSourceStatus(); - + // 开启 + Byte realSourceStatus = Optional.ofNullable(crawlSourceStatusMap.get(sourceId)).orElse((byte) 0); if (realSourceStatus == (byte) 0) { - //该爬虫源已经停止运行了,修改数据库状态,并启动线程爬取小说数据加入到runningCrawlThread中 - SpringUtil.getBean(CrawlService.class).updateCrawlSourceStatus(sourceId, sourceStatus); + // 查询爬虫源规则 + CrawlSource source = queryCrawlSource(sourceId); + //该爬虫源已经停止运行了,启动线程爬取小说数据并将线程加入到runningCrawlThread中 RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class); - Set threadIds = new HashSet<>(); //按分类开始爬虫解析任务 for (int i = 1; i < 8; i++) { @@ -150,16 +151,15 @@ public class CrawlServiceImpl implements CrawlService { thread.start(); //thread加入到监控缓存中 threadIds.add(thread.getId()); - } cacheService.setObject(CacheKey.RUNNING_CRAWL_THREAD_KEY_PREFIX + sourceId, threadIds); - - } - } + // 保存爬虫源状态 + crawlSourceStatusMap.put(sourceId, sourceStatus); + } @Override