mirror of
https://github.com/201206030/novel-plus.git
synced 2025-07-19 07:36:39 +00:00
feat(novel-crawl): 增加爬虫源采集章节数量监控功能
可以监测到爬虫源在当前环境下是否可用
This commit is contained in:
@ -12,6 +12,8 @@ import io.github.xxyopen.util.IdWorker;
|
|||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.springframework.data.redis.core.RedisTemplate;
|
||||||
|
import org.springframework.data.redis.core.StringRedisTemplate;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
@ -34,6 +36,13 @@ public class CrawlParser {
|
|||||||
|
|
||||||
private final CrawlHttpClient crawlHttpClient;
|
private final CrawlHttpClient crawlHttpClient;
|
||||||
|
|
||||||
|
private final StringRedisTemplate stringRedisTemplate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 爬虫源采集章节数量缓存key
|
||||||
|
*/
|
||||||
|
private static final String CRAWL_SOURCE_CHAPTER_COUNT_CACHE_KEY = "crawlSource:chapterCount:";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 爬虫任务进度
|
* 爬虫任务进度
|
||||||
*/
|
*/
|
||||||
@ -53,6 +62,20 @@ public class CrawlParser {
|
|||||||
crawlTaskProgress.remove(taskId);
|
crawlTaskProgress.remove(taskId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取爬虫源采集的章节数量
|
||||||
|
*/
|
||||||
|
public Long getCrawlSourceChapterCount(Integer sourceId) {
|
||||||
|
return Optional.ofNullable(
|
||||||
|
stringRedisTemplate.opsForValue().get(CRAWL_SOURCE_CHAPTER_COUNT_CACHE_KEY + sourceId)).map(v -> {
|
||||||
|
try {
|
||||||
|
return Long.parseLong(v);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
return 0L;
|
||||||
|
}
|
||||||
|
}).orElse(0L);
|
||||||
|
}
|
||||||
|
|
||||||
public void parseBook(RuleBean ruleBean, String bookId, CrawlBookHandler handler)
|
public void parseBook(RuleBean ruleBean, String bookId, CrawlBookHandler handler)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
Book book = new Book();
|
Book book = new Book();
|
||||||
@ -182,7 +205,7 @@ public class CrawlParser {
|
|||||||
handler.handle(book);
|
handler.handle(book);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean,
|
public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean, Integer sourceId,
|
||||||
Map<Integer, BookIndex> existBookIndexMap, CrawlBookChapterHandler handler, CrawlSingleTask task)
|
Map<Integer, BookIndex> existBookIndexMap, CrawlBookChapterHandler handler, CrawlSingleTask task)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
|
|
||||||
@ -314,10 +337,12 @@ public class CrawlParser {
|
|||||||
bookIndex.setUpdateTime(currentDate);
|
bookIndex.setUpdateTime(currentDate);
|
||||||
|
|
||||||
if (task != null) {
|
if (task != null) {
|
||||||
// 更新采集进度
|
// 更新单本任务采集进度
|
||||||
crawlTaskProgress.put(task.getId(), indexList.size());
|
crawlTaskProgress.put(task.getId(), indexList.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 更新爬虫源采集章节数量
|
||||||
|
stringRedisTemplate.opsForValue().increment(CRAWL_SOURCE_CHAPTER_COUNT_CACHE_KEY + sourceId);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ public class StarterListener implements ServletContextInitializer {
|
|||||||
needUpdateBook.getId());
|
needUpdateBook.getId());
|
||||||
//解析章节目录
|
//解析章节目录
|
||||||
crawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book,
|
crawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book,
|
||||||
ruleBean, existBookIndexMap,
|
ruleBean, needUpdateBook.getCrawlSourceId(), existBookIndexMap,
|
||||||
chapter -> bookService.updateBookAndIndexAndContent(book,
|
chapter -> bookService.updateBookAndIndexAndContent(book,
|
||||||
chapter.getBookIndexList(),
|
chapter.getBookIndexList(),
|
||||||
chapter.getBookContentList(), existBookIndexMap), null);
|
chapter.getBookContentList(), existBookIndexMap), null);
|
||||||
|
@ -104,10 +104,15 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
.build()
|
.build()
|
||||||
.render(RenderingStrategies.MYBATIS3);
|
.render(RenderingStrategies.MYBATIS3);
|
||||||
List<CrawlSource> crawlSources = crawlSourceMapper.selectMany(render);
|
List<CrawlSource> crawlSources = crawlSourceMapper.selectMany(render);
|
||||||
crawlSources.forEach(crawlSource -> crawlSource.setSourceStatus(
|
|
||||||
Optional.ofNullable(crawlSourceStatusMap.get(crawlSource.getId())).orElse((byte) 0)));
|
|
||||||
PageBean<CrawlSource> pageBean = PageBuilder.build(crawlSources);
|
PageBean<CrawlSource> pageBean = PageBuilder.build(crawlSources);
|
||||||
pageBean.setList(BeanUtil.copyList(crawlSources, CrawlSourceVO.class));
|
List<CrawlSourceVO> crawlSourceVOS = BeanUtil.copyList(crawlSources, CrawlSourceVO.class);
|
||||||
|
crawlSourceVOS.forEach(crawlSource -> {
|
||||||
|
crawlSource.setSourceStatus(
|
||||||
|
Optional.ofNullable(crawlSourceStatusMap.get(crawlSource.getId())).orElse((byte) 0));
|
||||||
|
crawlSource.setChapterCount(crawlParser.getCrawlSourceChapterCount(crawlSource.getId()));
|
||||||
|
}
|
||||||
|
);
|
||||||
|
pageBean.setList(crawlSourceVOS);
|
||||||
return pageBean;
|
return pageBean;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -386,7 +391,7 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
book.setCrawlLastTime(new Date());
|
book.setCrawlLastTime(new Date());
|
||||||
book.setId(idWorker.nextId());
|
book.setId(idWorker.nextId());
|
||||||
//解析章节目录
|
//解析章节目录
|
||||||
boolean parseIndexContentResult = crawlParser.parseBookIndexAndContent(bookId, book, ruleBean,
|
boolean parseIndexContentResult = crawlParser.parseBookIndexAndContent(bookId, book, ruleBean, sourceId,
|
||||||
new HashMap<>(0), chapter -> {
|
new HashMap<>(0), chapter -> {
|
||||||
bookService.saveBookAndIndexAndContent(book, chapter.getBookIndexList(),
|
bookService.saveBookAndIndexAndContent(book, chapter.getBookIndexList(),
|
||||||
chapter.getBookContentList());
|
chapter.getBookContentList());
|
||||||
|
@ -20,7 +20,7 @@ public class CrawlSourceVO extends CrawlSource{
|
|||||||
@JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm")
|
@JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm")
|
||||||
private Date updateTime;
|
private Date updateTime;
|
||||||
|
|
||||||
|
private Long chapterCount;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
@ -43,7 +43,7 @@
|
|||||||
<th class="style">
|
<th class="style">
|
||||||
序号
|
序号
|
||||||
</th>
|
</th>
|
||||||
<th class="chapter">
|
<th class="name">
|
||||||
爬虫源
|
爬虫源
|
||||||
</th>
|
</th>
|
||||||
<th class="name">
|
<th class="name">
|
||||||
@ -52,6 +52,9 @@
|
|||||||
<th class="name">
|
<th class="name">
|
||||||
更新时间
|
更新时间
|
||||||
</th>
|
</th>
|
||||||
|
<th class="goread">
|
||||||
|
采集数量
|
||||||
|
</th>
|
||||||
<th class="goread">
|
<th class="goread">
|
||||||
状态
|
状态
|
||||||
</th>
|
</th>
|
||||||
@ -111,11 +114,17 @@
|
|||||||
<script src="/javascript/header.js" type="text/javascript"></script>
|
<script src="/javascript/header.js" type="text/javascript"></script>
|
||||||
<script src="/javascript/user.js" type="text/javascript"></script>
|
<script src="/javascript/user.js" type="text/javascript"></script>
|
||||||
<script language="javascript" type="text/javascript">
|
<script language="javascript" type="text/javascript">
|
||||||
search(1, 10);
|
let curr = 1;
|
||||||
|
let limit = 10;
|
||||||
|
|
||||||
|
search();
|
||||||
|
setInterval(function(){
|
||||||
|
search();
|
||||||
|
}, 10000);
|
||||||
|
|
||||||
var pageCrawlSourceList = null;
|
var pageCrawlSourceList = null;
|
||||||
|
|
||||||
function search(curr, limit) {
|
function search() {
|
||||||
|
|
||||||
$.ajax({
|
$.ajax({
|
||||||
type: "get",
|
type: "get",
|
||||||
@ -134,13 +143,15 @@
|
|||||||
" <td class=\"style bookclass\">\n" +
|
" <td class=\"style bookclass\">\n" +
|
||||||
" [" + (i + 1) + "]\n" +
|
" [" + (i + 1) + "]\n" +
|
||||||
" </td>\n" +
|
" </td>\n" +
|
||||||
" <td class=\"chapter\">\n" +
|
" <td class=\"name\">\n" +
|
||||||
" " + crawlSource.sourceName + "</td>\n" +
|
" " + crawlSource.sourceName + "</td>\n" +
|
||||||
" <td class=\"name\" valsc=\"291|2037554|1\">"
|
" <td class=\"name\" valsc=\"291|2037554|1\">"
|
||||||
+ crawlSource.createTime + "</td>\n" +
|
+ crawlSource.createTime + "</td>\n" +
|
||||||
" <td class=\"name\">\n" +
|
" <td class=\"name\">\n" +
|
||||||
" " + crawlSource.updateTime + "\n" +
|
" " + crawlSource.updateTime + "\n" +
|
||||||
" </td>\n" +
|
" </td>\n" +
|
||||||
|
" <td class=\"goread\">\n" +
|
||||||
|
" " + crawlSource.chapterCount + "章</td>\n" +
|
||||||
" <td class=\"goread\" id='sourceStatus" + crawlSource.id + "'>" + (crawlSource.sourceStatus == 0 ? '停止运行' : '正在运行') +
|
" <td class=\"goread\" id='sourceStatus" + crawlSource.id + "'>" + (crawlSource.sourceStatus == 0 ? '停止运行' : '正在运行') +
|
||||||
" </td>\n" +
|
" </td>\n" +
|
||||||
|
|
||||||
@ -169,7 +180,9 @@
|
|||||||
|
|
||||||
//首次不执行
|
//首次不执行
|
||||||
if (!first) {
|
if (!first) {
|
||||||
search(obj.curr, obj.limit);
|
curr = obj.curr;
|
||||||
|
limit = obj.limit;
|
||||||
|
search();
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -216,11 +229,11 @@
|
|||||||
if (status == 0) {
|
if (status == 0) {
|
||||||
//开启
|
//开启
|
||||||
$("#sourceStatus" + sourceId).html("正在运行");
|
$("#sourceStatus" + sourceId).html("正在运行");
|
||||||
$("#opt" + sourceId).html("<a href='javascript:openOrStopCrawl(" + sourceId + "," + 1 + ")'>关闭</a>");
|
$("#opt" + sourceId).html("<a href='javascript:openOrStopCrawl(" + sourceId + "," + 1 + ")'>关闭 </a>"+"<a href='javascript:updateCrawlSource(" + sourceId + ")'>修改 </a>");
|
||||||
} else {
|
} else {
|
||||||
//关闭
|
//关闭
|
||||||
$("#sourceStatus" + sourceId).html("停止运行");
|
$("#sourceStatus" + sourceId).html("停止运行");
|
||||||
$("#opt" + sourceId).html("<a href='javascript:openOrStopCrawl(" + sourceId + "," + 0 + ")'>开启</a>");
|
$("#opt" + sourceId).html("<a href='javascript:openOrStopCrawl(" + sourceId + "," + 0 + ")'>开启 </a>"+"<a href='javascript:updateCrawlSource(" + sourceId + ")'>修改 </a>");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user