mirror of
https://github.com/201206030/novel-plus.git
synced 2025-04-27 01:30:51 +00:00
1.解决爬虫线程停止失败的bug,2新增新笔趣阁源,兼容更多源站
This commit is contained in:
parent
80b933db8d
commit
a0fb8e481a
Binary file not shown.
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 62 KiB |
@ -94,6 +94,8 @@ public class CrawlParser {
|
|||||||
|
|
||||||
String desc = bookDetailHtml.substring(bookDetailHtml.indexOf(ruleBean.getDescStart()) + ruleBean.getDescStart().length());
|
String desc = bookDetailHtml.substring(bookDetailHtml.indexOf(ruleBean.getDescStart()) + ruleBean.getDescStart().length());
|
||||||
desc = desc.substring(0, desc.indexOf(ruleBean.getDescEnd()));
|
desc = desc.substring(0, desc.indexOf(ruleBean.getDescEnd()));
|
||||||
|
//过滤掉简介中的a标签
|
||||||
|
desc = desc.replaceAll("<a[^<]+</a>","");
|
||||||
//设置书籍简介
|
//设置书籍简介
|
||||||
book.setBookDesc(desc);
|
book.setBookDesc(desc);
|
||||||
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
|
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
|
||||||
@ -173,6 +175,7 @@ public class CrawlParser {
|
|||||||
String lastIndexName = null;
|
String lastIndexName = null;
|
||||||
|
|
||||||
while (isFindIndex) {
|
while (isFindIndex) {
|
||||||
|
|
||||||
BookIndex hasIndex = hasIndexs.get(indexNum);
|
BookIndex hasIndex = hasIndexs.get(indexNum);
|
||||||
String indexName = indexNameMatch.group(1);
|
String indexName = indexNameMatch.group(1);
|
||||||
|
|
||||||
|
@ -167,6 +167,11 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
boolean isFindBookId = bookIdMatcher.find();
|
boolean isFindBookId = bookIdMatcher.find();
|
||||||
while (isFindBookId) {
|
while (isFindBookId) {
|
||||||
try {
|
try {
|
||||||
|
if(Thread.currentThread().isInterrupted()){
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
String bookId = bookIdMatcher.group(1);
|
String bookId = bookIdMatcher.group(1);
|
||||||
Book book = CrawlParser.parseBook(ruleBean, bookId);
|
Book book = CrawlParser.parseBook(ruleBean, bookId);
|
||||||
//这里只做新书入库,查询是否存在这本书
|
//这里只做新书入库,查询是否存在这本书
|
||||||
|
@ -179,9 +179,13 @@
|
|||||||
for (var i = 0; i < bookList.length; i++) {
|
for (var i = 0; i < bookList.length; i++) {
|
||||||
var book = bookList[i];
|
var book = bookList[i];
|
||||||
|
|
||||||
var end = book.bookDesc.indexOf("<");
|
/*var end = book.bookDesc.indexOf("<");
|
||||||
if(end != -1) {
|
if(end != -1) {
|
||||||
book.bookDesc = book.bookDesc.substring(0,end);
|
book.bookDesc = book.bookDesc.substring(0,end);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
if(book.bookDesc){
|
||||||
|
book.bookDesc = book.bookDesc.replace(/<[^>]+>/g,"").replace(/\s+/g,"");
|
||||||
}
|
}
|
||||||
|
|
||||||
bookListHtml += ("<div class=\"layui-row\" style=\"margin-bottom:10px;padding:10px;background: #f2f2f2\">\n" +
|
bookListHtml += ("<div class=\"layui-row\" style=\"margin-bottom:10px;padding:10px;background: #f2f2f2\">\n" +
|
||||||
|
@ -276,6 +276,10 @@
|
|||||||
for (var i = 0; i < 6; i++) {
|
for (var i = 0; i < 6; i++) {
|
||||||
var hotRecBook = hotRecBooks[i];
|
var hotRecBook = hotRecBooks[i];
|
||||||
|
|
||||||
|
if(hotRecBook.bookDesc){
|
||||||
|
hotRecBook.bookDesc = hotRecBook.bookDesc.replace(/<[^>]+>/g,"").replace(/\s+/g,"");
|
||||||
|
}
|
||||||
|
|
||||||
hotRecBooksHtml += ("<div style=\"margin-bottom: 5px\" class=\"layui-col-xs12 layui-col-sm6 layui-col-md4 layui-col-lg4\">\n" +
|
hotRecBooksHtml += ("<div style=\"margin-bottom: 5px\" class=\"layui-col-xs12 layui-col-sm6 layui-col-md4 layui-col-lg4\">\n" +
|
||||||
" <a href=\"/book/"+hotRecBook.bookId+".html\">\n" +
|
" <a href=\"/book/"+hotRecBook.bookId+".html\">\n" +
|
||||||
" <div class=\"layui-col-xs5 layui-col-sm4 layui-col-md4 layui-col-lg4\" >\n" +
|
" <div class=\"layui-col-xs5 layui-col-sm4 layui-col-md4 layui-col-lg4\" >\n" +
|
||||||
@ -323,9 +327,8 @@
|
|||||||
for (var i = 0; i < 10; i++) {
|
for (var i = 0; i < 10; i++) {
|
||||||
|
|
||||||
var updateRankBook = updateRankBooks[i];
|
var updateRankBook = updateRankBooks[i];
|
||||||
var end = updateRankBook.bookDesc.indexOf("<");
|
if(updateRankBook.bookDesc){
|
||||||
if(end != -1) {
|
updateRankBook.bookDesc = updateRankBook.bookDesc.replace(/<[^>]+>/g,"").replace(/\s+/g,"");
|
||||||
updateRankBook.bookDesc = updateRankBook.bookDesc.substring(0,end);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
updateRankBookHtml += ("<div style=\"padding-bottom: 30px\"\n" +
|
updateRankBookHtml += ("<div style=\"padding-bottom: 30px\"\n" +
|
||||||
|
Loading…
x
Reference in New Issue
Block a user