1.解决爬虫线程停止失败的bug,2新增新笔趣阁源,兼容更多源站

This commit is contained in:
xiongxiaoyang 2020-05-24 00:54:27 +08:00
parent 80b933db8d
commit a0fb8e481a
5 changed files with 19 additions and 4 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 62 KiB

View File

@ -94,6 +94,8 @@ public class CrawlParser {
String desc = bookDetailHtml.substring(bookDetailHtml.indexOf(ruleBean.getDescStart()) + ruleBean.getDescStart().length());
desc = desc.substring(0, desc.indexOf(ruleBean.getDescEnd()));
//过滤掉简介中的a标签
desc = desc.replaceAll("<a[^<]+</a>","");
//设置书籍简介
book.setBookDesc(desc);
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
@ -173,6 +175,7 @@ public class CrawlParser {
String lastIndexName = null;
while (isFindIndex) {
BookIndex hasIndex = hasIndexs.get(indexNum);
String indexName = indexNameMatch.group(1);

View File

@ -167,6 +167,11 @@ public class CrawlServiceImpl implements CrawlService {
boolean isFindBookId = bookIdMatcher.find();
while (isFindBookId) {
try {
if(Thread.currentThread().isInterrupted()){
return;
}
String bookId = bookIdMatcher.group(1);
Book book = CrawlParser.parseBook(ruleBean, bookId);
//这里只做新书入库查询是否存在这本书

View File

@ -179,9 +179,13 @@
for (var i = 0; i < bookList.length; i++) {
var book = bookList[i];
var end = book.bookDesc.indexOf("<");
/*var end = book.bookDesc.indexOf("<");
if(end != -1) {
book.bookDesc = book.bookDesc.substring(0,end);
}*/
if(book.bookDesc){
book.bookDesc = book.bookDesc.replace(/<[^>]+>/g,"").replace(/\s+/g,"");
}
bookListHtml += ("<div class=\"layui-row\" style=\"margin-bottom:10px;padding:10px;background: #f2f2f2\">\n" +

View File

@ -276,6 +276,10 @@
for (var i = 0; i < 6; i++) {
var hotRecBook = hotRecBooks[i];
if(hotRecBook.bookDesc){
hotRecBook.bookDesc = hotRecBook.bookDesc.replace(/<[^>]+>/g,"").replace(/\s+/g,"");
}
hotRecBooksHtml += ("<div style=\"margin-bottom: 5px\" class=\"layui-col-xs12 layui-col-sm6 layui-col-md4 layui-col-lg4\">\n" +
" <a href=\"/book/"+hotRecBook.bookId+".html\">\n" +
" <div class=\"layui-col-xs5 layui-col-sm4 layui-col-md4 layui-col-lg4\" >\n" +
@ -323,9 +327,8 @@
for (var i = 0; i < 10; i++) {
var updateRankBook = updateRankBooks[i];
var end = updateRankBook.bookDesc.indexOf("<");
if(end != -1) {
updateRankBook.bookDesc = updateRankBook.bookDesc.substring(0,end);
if(updateRankBook.bookDesc){
updateRankBook.bookDesc = updateRankBook.bookDesc.replace(/<[^>]+>/g,"").replace(/\s+/g,"");
}
updateRankBookHtml += ("<div style=\"padding-bottom: 30px\"\n" +