更新策略调整

This commit is contained in:
xiongxiaoyang 2020-01-13 11:53:38 +08:00
parent 0c6c7ba8f3
commit f403870d48
3 changed files with 25 additions and 5 deletions

View File

@ -55,9 +55,13 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
Pattern authorPatten = compile(getAuthorPattern());
Matcher authorMatch = authorPatten.matcher(forObject);
boolean isBookNameMatch = bookNameMatch.find();
while (isFind && scoreFind && isBookNameMatch) {
while (isFind && scoreFind && isBookNameMatch && authorMatch.find()) {
try {
Float score = Float.parseFloat(scoreMatch.group(1));
@ -71,7 +75,14 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
String bookName = bookNameMatch.group(1);
String author = authorMatch.group(1);
Boolean hasBook = bookService.hasBook(bookName, author);
if(hasBook) {
bookService.addBookParseLog(bookUrl, bookName, score);
}
} catch (Exception e) {

View File

@ -482,4 +482,13 @@ public class BookService {
bookParseLogMapper.deleteByExample(example);
}
}
/**
* 查询书籍是否存在
* */
public Boolean hasBook(String bookName, String author) {
BookExample example = new BookExample();
example.createCriteria().andBookNameEqualTo(bookName).andAuthorEqualTo(author);
return bookMapper.countByExample(example)>0;
}
}

View File

@ -3,9 +3,9 @@ server:
spring:
datasource:
url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai
username: root
password: test123456
url: jdbc:mysql://47.106.243.172:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai
username: books
password: books!8888
# url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai
# username: root
# password: test123456