mirror of
https://github.com/201206030/novel-plus.git
synced 2025-05-17 07:18:29 +00:00
fix(novel-crawl): 修复部分源无法停止的问题
This commit is contained in:
parent
4f474b91a8
commit
55d5deea74
@ -256,53 +256,52 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
String catIdRule = ruleBean.getCatIdRule().get("catId" + catId);
|
String catIdRule = ruleBean.getCatIdRule().get("catId" + catId);
|
||||||
if (StringUtils.isNotBlank(catIdRule)) {
|
if (StringUtils.isBlank(catIdRule) || Thread.currentThread().isInterrupted()) {
|
||||||
String catBookListUrl = "";
|
return;
|
||||||
if (StringUtils.isNotBlank(ruleBean.getBookListUrl())) {
|
}
|
||||||
// 兼容老规则
|
String catBookListUrl = "";
|
||||||
// 拼接分类URL
|
if (StringUtils.isNotBlank(ruleBean.getBookListUrl())) {
|
||||||
catBookListUrl = ruleBean.getBookListUrl()
|
// 兼容老规则
|
||||||
.replace("{catId}", catIdRule)
|
// 拼接分类URL
|
||||||
.replace("{page}", page + "");
|
catBookListUrl = ruleBean.getBookListUrl()
|
||||||
} else {
|
.replace("{catId}", catIdRule)
|
||||||
// 新规则
|
.replace("{page}", page + "");
|
||||||
// 拼接分类URL
|
} else {
|
||||||
catBookListUrl = catIdRule.replace("{page}", page + "");
|
// 新规则
|
||||||
}
|
// 拼接分类URL
|
||||||
log.info("catBookListUrl:{}", catBookListUrl);
|
catBookListUrl = catIdRule.replace("{page}", page + "");
|
||||||
|
}
|
||||||
|
log.info("catBookListUrl:{}", catBookListUrl);
|
||||||
|
|
||||||
String bookListHtml = crawlHttpClient.get(catBookListUrl, ruleBean.getCharset());
|
String bookListHtml = crawlHttpClient.get(catBookListUrl, ruleBean.getCharset());
|
||||||
if (bookListHtml != null) {
|
if (bookListHtml != null) {
|
||||||
Pattern bookIdPatten = Pattern.compile(ruleBean.getBookIdPatten());
|
Pattern bookIdPatten = Pattern.compile(ruleBean.getBookIdPatten());
|
||||||
Matcher bookIdMatcher = bookIdPatten.matcher(bookListHtml);
|
Matcher bookIdMatcher = bookIdPatten.matcher(bookListHtml);
|
||||||
boolean isFindBookId = bookIdMatcher.find();
|
boolean isFindBookId = bookIdMatcher.find();
|
||||||
while (isFindBookId) {
|
while (isFindBookId) {
|
||||||
try {
|
try {
|
||||||
//1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时)
|
//1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时)
|
||||||
//捕获中断异常InterruptedException来退出线程。
|
//捕获中断异常InterruptedException来退出线程。
|
||||||
//2.非阻塞过程中通过判断中断标志来退出线程。
|
//2.非阻塞过程中通过判断中断标志来退出线程。
|
||||||
if (Thread.currentThread().isInterrupted()) {
|
if (Thread.currentThread().isInterrupted()) {
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
String bookId = bookIdMatcher.group(1);
|
|
||||||
parseBookAndSave(catId, ruleBean, sourceId, bookId);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error(e.getMessage(), e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
isFindBookId = bookIdMatcher.find();
|
String bookId = bookIdMatcher.group(1);
|
||||||
|
parseBookAndSave(catId, ruleBean, sourceId, bookId);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error(e.getMessage(), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
Pattern totalPagePatten = Pattern.compile(ruleBean.getTotalPagePatten());
|
isFindBookId = bookIdMatcher.find();
|
||||||
Matcher totalPageMatcher = totalPagePatten.matcher(bookListHtml);
|
}
|
||||||
boolean isFindTotalPage = totalPageMatcher.find();
|
|
||||||
if (isFindTotalPage) {
|
|
||||||
|
|
||||||
totalPage = Integer.parseInt(totalPageMatcher.group(1));
|
Pattern totalPagePatten = Pattern.compile(ruleBean.getTotalPagePatten());
|
||||||
|
Matcher totalPageMatcher = totalPagePatten.matcher(bookListHtml);
|
||||||
}
|
boolean isFindTotalPage = totalPageMatcher.find();
|
||||||
|
if (isFindTotalPage) {
|
||||||
|
|
||||||
|
totalPage = Integer.parseInt(totalPageMatcher.group(1));
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user