mirror of
https://github.com/201206030/novel-plus.git
synced 2025-04-26 17:20:52 +00:00
1.解决爬虫线程停止失败的bug,2新增新笔趣阁源,兼容更多源站
This commit is contained in:
parent
80b933db8d
commit
a0fb8e481a
Binary file not shown.
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 62 KiB |
@ -94,6 +94,8 @@ public class CrawlParser {
|
||||
|
||||
String desc = bookDetailHtml.substring(bookDetailHtml.indexOf(ruleBean.getDescStart()) + ruleBean.getDescStart().length());
|
||||
desc = desc.substring(0, desc.indexOf(ruleBean.getDescEnd()));
|
||||
//过滤掉简介中的a标签
|
||||
desc = desc.replaceAll("<a[^<]+</a>","");
|
||||
//设置书籍简介
|
||||
book.setBookDesc(desc);
|
||||
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
|
||||
@ -173,6 +175,7 @@ public class CrawlParser {
|
||||
String lastIndexName = null;
|
||||
|
||||
while (isFindIndex) {
|
||||
|
||||
BookIndex hasIndex = hasIndexs.get(indexNum);
|
||||
String indexName = indexNameMatch.group(1);
|
||||
|
||||
|
@ -167,6 +167,11 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
boolean isFindBookId = bookIdMatcher.find();
|
||||
while (isFindBookId) {
|
||||
try {
|
||||
if(Thread.currentThread().isInterrupted()){
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
String bookId = bookIdMatcher.group(1);
|
||||
Book book = CrawlParser.parseBook(ruleBean, bookId);
|
||||
//这里只做新书入库,查询是否存在这本书
|
||||
|
@ -179,9 +179,13 @@
|
||||
for (var i = 0; i < bookList.length; i++) {
|
||||
var book = bookList[i];
|
||||
|
||||
var end = book.bookDesc.indexOf("<");
|
||||
/*var end = book.bookDesc.indexOf("<");
|
||||
if(end != -1) {
|
||||
book.bookDesc = book.bookDesc.substring(0,end);
|
||||
}*/
|
||||
|
||||
if(book.bookDesc){
|
||||
book.bookDesc = book.bookDesc.replace(/<[^>]+>/g,"").replace(/\s+/g,"");
|
||||
}
|
||||
|
||||
bookListHtml += ("<div class=\"layui-row\" style=\"margin-bottom:10px;padding:10px;background: #f2f2f2\">\n" +
|
||||
|
@ -276,6 +276,10 @@
|
||||
for (var i = 0; i < 6; i++) {
|
||||
var hotRecBook = hotRecBooks[i];
|
||||
|
||||
if(hotRecBook.bookDesc){
|
||||
hotRecBook.bookDesc = hotRecBook.bookDesc.replace(/<[^>]+>/g,"").replace(/\s+/g,"");
|
||||
}
|
||||
|
||||
hotRecBooksHtml += ("<div style=\"margin-bottom: 5px\" class=\"layui-col-xs12 layui-col-sm6 layui-col-md4 layui-col-lg4\">\n" +
|
||||
" <a href=\"/book/"+hotRecBook.bookId+".html\">\n" +
|
||||
" <div class=\"layui-col-xs5 layui-col-sm4 layui-col-md4 layui-col-lg4\" >\n" +
|
||||
@ -323,9 +327,8 @@
|
||||
for (var i = 0; i < 10; i++) {
|
||||
|
||||
var updateRankBook = updateRankBooks[i];
|
||||
var end = updateRankBook.bookDesc.indexOf("<");
|
||||
if(end != -1) {
|
||||
updateRankBook.bookDesc = updateRankBook.bookDesc.substring(0,end);
|
||||
if(updateRankBook.bookDesc){
|
||||
updateRankBook.bookDesc = updateRankBook.bookDesc.replace(/<[^>]+>/g,"").replace(/\s+/g,"");
|
||||
}
|
||||
|
||||
updateRankBookHtml += ("<div style=\"padding-bottom: 30px\"\n" +
|
||||
|
Loading…
x
Reference in New Issue
Block a user