1
0
mirror of https://github.com/201206030/novel-plus.git synced 2025-06-03 13:38:31 +00:00

fix(novel-crawl): 解决爬虫进程间的冲突问题,支持同时启动多个爬虫进程

This commit is contained in:
xiongxiaoyang 2025-05-13 11:11:27 +08:00
parent a07643bde0
commit 42bcecc304
2 changed files with 4 additions and 8 deletions
novel-common/src/main/java/com/java2nb/novel/core/cache
novel-crawl/src/main/java/com/java2nb/novel/service/impl

@ -41,11 +41,6 @@ public interface CacheKey {
* */
String TEMPLATE_DIR_KEY = "templateDirKey";;
/**
* 正在运行的爬虫线程存储KEY前缀
* */
String RUNNING_CRAWL_THREAD_KEY_PREFIX = "runningCrawlTreadDataKeyPrefix";
/**
* 上一次搜索引擎更新的时间
* */

@ -68,6 +68,8 @@ public class CrawlServiceImpl implements CrawlService {
private final Map<Integer, Byte> crawlSourceStatusMap = new HashMap<>();
private final Map<Integer, Set<Long>> runningCrawlThread = new HashMap<>();
@Override
public void addCrawlSource(CrawlSource source) {
@ -123,8 +125,7 @@ public class CrawlServiceImpl implements CrawlService {
if (sourceStatus == (byte) 0) {
// 关闭
// 将该爬虫源正在运行的线程集合全部停止
Set<Long> runningCrawlThreadId = (Set<Long>) cacheService.getObject(
CacheKey.RUNNING_CRAWL_THREAD_KEY_PREFIX + sourceId);
Set<Long> runningCrawlThreadId = runningCrawlThread.get(sourceId);
if (runningCrawlThreadId != null) {
for (Long ThreadId : runningCrawlThreadId) {
Thread thread = ThreadUtil.findThread(ThreadId);
@ -152,7 +153,7 @@ public class CrawlServiceImpl implements CrawlService {
//thread加入到监控缓存中
threadIds.add(thread.getId());
}
cacheService.setObject(CacheKey.RUNNING_CRAWL_THREAD_KEY_PREFIX + sourceId, threadIds);
runningCrawlThread.put(sourceId, threadIds);
}
}