v2.3.0发布

This commit is contained in:
xiongxiaoyang 2020-04-15 10:14:23 +08:00
parent 56645720b3
commit 290522ef6d

View File

@ -31,15 +31,8 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
@Override @Override
public void parse() { public void parse() {
Map<Integer,Date> cat2Date = bookService.queryLastUpdateTime(); for(int page = 1; page<= Constants.UPDATE_PAGES_ONCE; page++) {
Map<Integer,Date> newCat2Date = new HashMap<>(); String catBookListUrl = getListPageUrl().replace("{0}", "0").replace("{1}", page+"");
for(int i=1;i<=7;i++) {
Date lastUpdateTime = cat2Date.get(i);
Date updateTime = null;
int page = 1;
do{
String catBookListUrl = getListPageUrl().replace("{0}", i+"").replace("{1}", page + "");
page++;
String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl); String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl);
if (forObject != null) { if (forObject != null) {
//解析第一页书籍的数据 //解析第一页书籍的数据
@ -56,13 +49,9 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
Matcher bookNameMatch = bookNamePatten.matcher(forObject); Matcher bookNameMatch = bookNamePatten.matcher(forObject);
Pattern authorPatten = compile(getAuthorPattern());
Matcher authorMatch = authorPatten.matcher(forObject);
boolean isBookNameMatch = bookNameMatch.find(); boolean isBookNameMatch = bookNameMatch.find();
while (isFind && scoreFind && isBookNameMatch && authorMatch.find() && (updateTime==null || updateTime.getTime()>lastUpdateTime.getTime())) { while (isFind && scoreFind && isBookNameMatch) {
try { try {
Float score = Float.parseFloat(scoreMatch.group(1)); Float score = Float.parseFloat(scoreMatch.group(1));
@ -76,36 +65,7 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
String bookName = bookNameMatch.group(1); String bookName = bookNameMatch.group(1);
String author = authorMatch.group(1);
Boolean hasBook = bookService.hasBook(bookName, author);
if (hasBook) {
bookService.addBookParseLog(bookUrl, bookName, score); bookService.addBookParseLog(bookUrl, bookName, score);
}
String body = RestTemplateUtil.getBodyByUtf8(bookUrl);
if (body != null) {
Pattern updateTimePatten = compile(getUpdateTimePattern());
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format ;
if(updateTimeStr.length()>10){
format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
}else{
format = new SimpleDateFormat("yy-MM-dd");
}
updateTime = format.parse(updateTimeStr);
if(!newCat2Date.containsKey(i)) {
newCat2Date.put(i, updateTime);
}
}
}
} catch (Exception e) { } catch (Exception e) {
@ -122,9 +82,7 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
} }
} }
}while (updateTime == null || updateTime.getTime()>lastUpdateTime.getTime());
} }
bookService.updateBookUpdateTimeLog(newCat2Date);
} }
@ -163,7 +121,7 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
Pattern updateTimePatten = compile(getUpdateTimePattern()); Pattern updateTimePatten = compile(getUpdateTimePattern());
Matcher updateTimeMatch = updateTimePatten.matcher(body); Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) { /*if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1); String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format ; SimpleDateFormat format ;
if(updateTimeStr.length()>10){ if(updateTimeStr.length()>10){
@ -172,7 +130,7 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
}else{ }else{
format = new SimpleDateFormat("yy-MM-dd"); format = new SimpleDateFormat("yy-MM-dd");
} }
Date updateTime = format.parse(updateTimeStr); Date updateTime = format.parse(updateTimeStr);*/
Pattern picPatten = compile(getPicPattern()); Pattern picPatten = compile(getPicPattern());
Matcher picMather = picPatten.matcher(body); Matcher picMather = picPatten.matcher(body);
if (picMather.find()) { if (picMather.find()) {
@ -189,7 +147,7 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
book.setScore(score > 10 ? 8.0f : score); book.setScore(score > 10 ? 8.0f : score);
book.setPicUrl(picSrc); book.setPicUrl(picSrc);
book.setBookStatus(status); book.setBookStatus(status);
book.setUpdateTime(updateTime); book.setUpdateTime(new Date());
List<BookIndex> indexList = new ArrayList<>(); List<BookIndex> indexList = new ArrayList<>();
List<BookContent> contentList = new ArrayList<>(); List<BookContent> contentList = new ArrayList<>();
@ -261,7 +219,7 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
} }
} //}
} }
} }