fix(novel-crawl): 修复部分源无法停止的问题

This commit is contained in:
xiongxiaoyang 2025-05-12 17:48:24 +08:00
parent 4f474b91a8
commit 55d5deea74

View File

@ -256,53 +256,52 @@ public class CrawlServiceImpl implements CrawlService {
try { try {
String catIdRule = ruleBean.getCatIdRule().get("catId" + catId); String catIdRule = ruleBean.getCatIdRule().get("catId" + catId);
if (StringUtils.isNotBlank(catIdRule)) { if (StringUtils.isBlank(catIdRule) || Thread.currentThread().isInterrupted()) {
String catBookListUrl = ""; return;
if (StringUtils.isNotBlank(ruleBean.getBookListUrl())) { }
// 兼容老规则 String catBookListUrl = "";
// 拼接分类URL if (StringUtils.isNotBlank(ruleBean.getBookListUrl())) {
catBookListUrl = ruleBean.getBookListUrl() // 兼容老规则
.replace("{catId}", catIdRule) // 拼接分类URL
.replace("{page}", page + ""); catBookListUrl = ruleBean.getBookListUrl()
} else { .replace("{catId}", catIdRule)
// 新规则 .replace("{page}", page + "");
// 拼接分类URL } else {
catBookListUrl = catIdRule.replace("{page}", page + ""); // 新规则
} // 拼接分类URL
log.info("catBookListUrl{}", catBookListUrl); catBookListUrl = catIdRule.replace("{page}", page + "");
}
log.info("catBookListUrl{}", catBookListUrl);
String bookListHtml = crawlHttpClient.get(catBookListUrl, ruleBean.getCharset()); String bookListHtml = crawlHttpClient.get(catBookListUrl, ruleBean.getCharset());
if (bookListHtml != null) { if (bookListHtml != null) {
Pattern bookIdPatten = Pattern.compile(ruleBean.getBookIdPatten()); Pattern bookIdPatten = Pattern.compile(ruleBean.getBookIdPatten());
Matcher bookIdMatcher = bookIdPatten.matcher(bookListHtml); Matcher bookIdMatcher = bookIdPatten.matcher(bookListHtml);
boolean isFindBookId = bookIdMatcher.find(); boolean isFindBookId = bookIdMatcher.find();
while (isFindBookId) { while (isFindBookId) {
try { try {
//1.阻塞过程使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时 //1.阻塞过程使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时
//捕获中断异常InterruptedException来退出线程 //捕获中断异常InterruptedException来退出线程
//2.非阻塞过程中通过判断中断标志来退出线程 //2.非阻塞过程中通过判断中断标志来退出线程
if (Thread.currentThread().isInterrupted()) { if (Thread.currentThread().isInterrupted()) {
return; return;
}
String bookId = bookIdMatcher.group(1);
parseBookAndSave(catId, ruleBean, sourceId, bookId);
} catch (Exception e) {
log.error(e.getMessage(), e);
} }
isFindBookId = bookIdMatcher.find(); String bookId = bookIdMatcher.group(1);
parseBookAndSave(catId, ruleBean, sourceId, bookId);
} catch (Exception e) {
log.error(e.getMessage(), e);
} }
Pattern totalPagePatten = Pattern.compile(ruleBean.getTotalPagePatten()); isFindBookId = bookIdMatcher.find();
Matcher totalPageMatcher = totalPagePatten.matcher(bookListHtml); }
boolean isFindTotalPage = totalPageMatcher.find();
if (isFindTotalPage) {
totalPage = Integer.parseInt(totalPageMatcher.group(1)); Pattern totalPagePatten = Pattern.compile(ruleBean.getTotalPagePatten());
Matcher totalPageMatcher = totalPagePatten.matcher(bookListHtml);
} boolean isFindTotalPage = totalPageMatcher.find();
if (isFindTotalPage) {
totalPage = Integer.parseInt(totalPageMatcher.group(1));
} }
} }