mirror of
https://github.com/201206030/novel-plus.git
synced 2025-06-24 04:46:37 +00:00
1.解决爬虫线程停止失败的bug,2新增新笔趣阁源,兼容更多源站
This commit is contained in:
@ -94,6 +94,8 @@ public class CrawlParser {
|
||||
|
||||
String desc = bookDetailHtml.substring(bookDetailHtml.indexOf(ruleBean.getDescStart()) + ruleBean.getDescStart().length());
|
||||
desc = desc.substring(0, desc.indexOf(ruleBean.getDescEnd()));
|
||||
//过滤掉简介中的a标签
|
||||
desc = desc.replaceAll("<a[^<]+</a>","");
|
||||
//设置书籍简介
|
||||
book.setBookDesc(desc);
|
||||
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
|
||||
@ -173,6 +175,7 @@ public class CrawlParser {
|
||||
String lastIndexName = null;
|
||||
|
||||
while (isFindIndex) {
|
||||
|
||||
BookIndex hasIndex = hasIndexs.get(indexNum);
|
||||
String indexName = indexNameMatch.group(1);
|
||||
|
||||
|
@ -167,6 +167,11 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
boolean isFindBookId = bookIdMatcher.find();
|
||||
while (isFindBookId) {
|
||||
try {
|
||||
if(Thread.currentThread().isInterrupted()){
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
String bookId = bookIdMatcher.group(1);
|
||||
Book book = CrawlParser.parseBook(ruleBean, bookId);
|
||||
//这里只做新书入库,查询是否存在这本书
|
||||
|
Reference in New Issue
Block a user