mirror of
https://github.com/201206030/novel-plus.git
synced 2025-07-19 07:36:39 +00:00
feat(novel-crawl): 增加爬虫源采集章节数量监控功能
可以监测到爬虫源在当前环境下是否可用
This commit is contained in:
@ -12,6 +12,8 @@ import io.github.xxyopen.util.IdWorker;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.data.redis.core.RedisTemplate;
|
||||
import org.springframework.data.redis.core.StringRedisTemplate;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.text.ParseException;
|
||||
@ -34,6 +36,13 @@ public class CrawlParser {
|
||||
|
||||
private final CrawlHttpClient crawlHttpClient;
|
||||
|
||||
private final StringRedisTemplate stringRedisTemplate;
|
||||
|
||||
/**
|
||||
* 爬虫源采集章节数量缓存key
|
||||
*/
|
||||
private static final String CRAWL_SOURCE_CHAPTER_COUNT_CACHE_KEY = "crawlSource:chapterCount:";
|
||||
|
||||
/**
|
||||
* 爬虫任务进度
|
||||
*/
|
||||
@ -53,6 +62,20 @@ public class CrawlParser {
|
||||
crawlTaskProgress.remove(taskId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取爬虫源采集的章节数量
|
||||
*/
|
||||
public Long getCrawlSourceChapterCount(Integer sourceId) {
|
||||
return Optional.ofNullable(
|
||||
stringRedisTemplate.opsForValue().get(CRAWL_SOURCE_CHAPTER_COUNT_CACHE_KEY + sourceId)).map(v -> {
|
||||
try {
|
||||
return Long.parseLong(v);
|
||||
} catch (NumberFormatException e) {
|
||||
return 0L;
|
||||
}
|
||||
}).orElse(0L);
|
||||
}
|
||||
|
||||
public void parseBook(RuleBean ruleBean, String bookId, CrawlBookHandler handler)
|
||||
throws InterruptedException {
|
||||
Book book = new Book();
|
||||
@ -182,7 +205,7 @@ public class CrawlParser {
|
||||
handler.handle(book);
|
||||
}
|
||||
|
||||
public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean,
|
||||
public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean, Integer sourceId,
|
||||
Map<Integer, BookIndex> existBookIndexMap, CrawlBookChapterHandler handler, CrawlSingleTask task)
|
||||
throws InterruptedException {
|
||||
|
||||
@ -314,10 +337,12 @@ public class CrawlParser {
|
||||
bookIndex.setUpdateTime(currentDate);
|
||||
|
||||
if (task != null) {
|
||||
// 更新采集进度
|
||||
// 更新单本任务采集进度
|
||||
crawlTaskProgress.put(task.getId(), indexList.size());
|
||||
}
|
||||
|
||||
// 更新爬虫源采集章节数量
|
||||
stringRedisTemplate.opsForValue().increment(CRAWL_SOURCE_CHAPTER_COUNT_CACHE_KEY + sourceId);
|
||||
|
||||
}
|
||||
|
||||
|
@ -74,7 +74,7 @@ public class StarterListener implements ServletContextInitializer {
|
||||
needUpdateBook.getId());
|
||||
//解析章节目录
|
||||
crawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book,
|
||||
ruleBean, existBookIndexMap,
|
||||
ruleBean, needUpdateBook.getCrawlSourceId(), existBookIndexMap,
|
||||
chapter -> bookService.updateBookAndIndexAndContent(book,
|
||||
chapter.getBookIndexList(),
|
||||
chapter.getBookContentList(), existBookIndexMap), null);
|
||||
|
@ -104,10 +104,15 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3);
|
||||
List<CrawlSource> crawlSources = crawlSourceMapper.selectMany(render);
|
||||
crawlSources.forEach(crawlSource -> crawlSource.setSourceStatus(
|
||||
Optional.ofNullable(crawlSourceStatusMap.get(crawlSource.getId())).orElse((byte) 0)));
|
||||
PageBean<CrawlSource> pageBean = PageBuilder.build(crawlSources);
|
||||
pageBean.setList(BeanUtil.copyList(crawlSources, CrawlSourceVO.class));
|
||||
List<CrawlSourceVO> crawlSourceVOS = BeanUtil.copyList(crawlSources, CrawlSourceVO.class);
|
||||
crawlSourceVOS.forEach(crawlSource -> {
|
||||
crawlSource.setSourceStatus(
|
||||
Optional.ofNullable(crawlSourceStatusMap.get(crawlSource.getId())).orElse((byte) 0));
|
||||
crawlSource.setChapterCount(crawlParser.getCrawlSourceChapterCount(crawlSource.getId()));
|
||||
}
|
||||
);
|
||||
pageBean.setList(crawlSourceVOS);
|
||||
return pageBean;
|
||||
}
|
||||
|
||||
@ -386,7 +391,7 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
book.setCrawlLastTime(new Date());
|
||||
book.setId(idWorker.nextId());
|
||||
//解析章节目录
|
||||
boolean parseIndexContentResult = crawlParser.parseBookIndexAndContent(bookId, book, ruleBean,
|
||||
boolean parseIndexContentResult = crawlParser.parseBookIndexAndContent(bookId, book, ruleBean, sourceId,
|
||||
new HashMap<>(0), chapter -> {
|
||||
bookService.saveBookAndIndexAndContent(book, chapter.getBookIndexList(),
|
||||
chapter.getBookContentList());
|
||||
|
@ -20,7 +20,7 @@ public class CrawlSourceVO extends CrawlSource{
|
||||
@JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm")
|
||||
private Date updateTime;
|
||||
|
||||
|
||||
private Long chapterCount;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -43,7 +43,7 @@
|
||||
<th class="style">
|
||||
序号
|
||||
</th>
|
||||
<th class="chapter">
|
||||
<th class="name">
|
||||
爬虫源
|
||||
</th>
|
||||
<th class="name">
|
||||
@ -52,6 +52,9 @@
|
||||
<th class="name">
|
||||
更新时间
|
||||
</th>
|
||||
<th class="goread">
|
||||
采集数量
|
||||
</th>
|
||||
<th class="goread">
|
||||
状态
|
||||
</th>
|
||||
@ -111,11 +114,17 @@
|
||||
<script src="/javascript/header.js" type="text/javascript"></script>
|
||||
<script src="/javascript/user.js" type="text/javascript"></script>
|
||||
<script language="javascript" type="text/javascript">
|
||||
search(1, 10);
|
||||
let curr = 1;
|
||||
let limit = 10;
|
||||
|
||||
search();
|
||||
setInterval(function(){
|
||||
search();
|
||||
}, 10000);
|
||||
|
||||
var pageCrawlSourceList = null;
|
||||
|
||||
function search(curr, limit) {
|
||||
function search() {
|
||||
|
||||
$.ajax({
|
||||
type: "get",
|
||||
@ -134,13 +143,15 @@
|
||||
" <td class=\"style bookclass\">\n" +
|
||||
" [" + (i + 1) + "]\n" +
|
||||
" </td>\n" +
|
||||
" <td class=\"chapter\">\n" +
|
||||
" <td class=\"name\">\n" +
|
||||
" " + crawlSource.sourceName + "</td>\n" +
|
||||
" <td class=\"name\" valsc=\"291|2037554|1\">"
|
||||
+ crawlSource.createTime + "</td>\n" +
|
||||
" <td class=\"name\">\n" +
|
||||
" " + crawlSource.updateTime + "\n" +
|
||||
" </td>\n" +
|
||||
" <td class=\"goread\">\n" +
|
||||
" " + crawlSource.chapterCount + "章</td>\n" +
|
||||
" <td class=\"goread\" id='sourceStatus" + crawlSource.id + "'>" + (crawlSource.sourceStatus == 0 ? '停止运行' : '正在运行') +
|
||||
" </td>\n" +
|
||||
|
||||
@ -169,7 +180,9 @@
|
||||
|
||||
//首次不执行
|
||||
if (!first) {
|
||||
search(obj.curr, obj.limit);
|
||||
curr = obj.curr;
|
||||
limit = obj.limit;
|
||||
search();
|
||||
} else {
|
||||
|
||||
}
|
||||
@ -216,11 +229,11 @@
|
||||
if (status == 0) {
|
||||
//开启
|
||||
$("#sourceStatus" + sourceId).html("正在运行");
|
||||
$("#opt" + sourceId).html("<a href='javascript:openOrStopCrawl(" + sourceId + "," + 1 + ")'>关闭</a>");
|
||||
$("#opt" + sourceId).html("<a href='javascript:openOrStopCrawl(" + sourceId + "," + 1 + ")'>关闭 </a>"+"<a href='javascript:updateCrawlSource(" + sourceId + ")'>修改 </a>");
|
||||
} else {
|
||||
//关闭
|
||||
$("#sourceStatus" + sourceId).html("停止运行");
|
||||
$("#opt" + sourceId).html("<a href='javascript:openOrStopCrawl(" + sourceId + "," + 0 + ")'>开启</a>");
|
||||
$("#opt" + sourceId).html("<a href='javascript:openOrStopCrawl(" + sourceId + "," + 0 + ")'>开启 </a>"+"<a href='javascript:updateCrawlSource(" + sourceId + ")'>修改 </a>");
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user