更新策略优化

This commit is contained in:
xiongxiaoyang
2020-01-17 10:46:22 +08:00
parent 05dcf7056b
commit 2967a94e59
12 changed files with 797 additions and 64 deletions

View File

@ -5,20 +5,14 @@ import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import xyz.zinglizingli.books.core.utils.Constants;
import xyz.zinglizingli.books.mapper.BookParseLogMapper;
import xyz.zinglizingli.books.po.Book;
import xyz.zinglizingli.books.po.BookContent;
import xyz.zinglizingli.books.po.BookIndex;
import xyz.zinglizingli.books.po.BookParseLog;
import xyz.zinglizingli.books.po.*;
import xyz.zinglizingli.books.service.BookService;
import xyz.zinglizingli.books.core.utils.CatUtil;
import xyz.zinglizingli.common.utils.ExcutorUtils;
import xyz.zinglizingli.common.utils.RestTemplateUtil;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -37,69 +31,94 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
@Override
public void parse() {
for(int page = 1; page<= Constants.UPDATE_PAGES_ONCE; page++) {
String catBookListUrl = getListPageUrl().replace("{0}", "0").replace("{1}", page+"");
String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl);
if (forObject != null) {
//解析第一页书籍的数据
Pattern bookPatten = compile(getBookUrlPattern());
Map<Integer,Date> cat2Date = bookService.queryLastUpdateTime();
Map<Integer,Date> newCat2Date = new HashMap<>();
for(int i=1;i<=7;i++) {
Date lastUpdateTime = cat2Date.get(i);
Date updateTime = lastUpdateTime;
int page = 1;
do{
String catBookListUrl = getListPageUrl().replace("{0}", "0").replace("{1}", page + "");
page++;
String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl);
if (forObject != null) {
//解析第一页书籍的数据
Pattern bookPatten = compile(getBookUrlPattern());
Matcher bookMatcher = bookPatten.matcher(forObject);
Matcher bookMatcher = bookPatten.matcher(forObject);
boolean isFind = bookMatcher.find();
Pattern scorePatten = compile(getScorePattern());
Matcher scoreMatch = scorePatten.matcher(forObject);
boolean scoreFind = scoreMatch.find();
boolean isFind = bookMatcher.find();
Pattern scorePatten = compile(getScorePattern());
Matcher scoreMatch = scorePatten.matcher(forObject);
boolean scoreFind = scoreMatch.find();
Pattern bookNamePatten = compile(getBookNamePattern());
Pattern bookNamePatten = compile(getBookNamePattern());
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
Pattern authorPatten = compile(getAuthorPattern());
Pattern authorPatten = compile(getAuthorPattern());
Matcher authorMatch = authorPatten.matcher(forObject);
Matcher authorMatch = authorPatten.matcher(forObject);
boolean isBookNameMatch = bookNameMatch.find();
boolean isBookNameMatch = bookNameMatch.find();
while (isFind && scoreFind && isBookNameMatch && authorMatch.find()) {
while (isFind && scoreFind && isBookNameMatch && authorMatch.find() && updateTime.getTime()>=lastUpdateTime.getTime()) {
try {
Float score = Float.parseFloat(scoreMatch.group(1));
try {
Float score = Float.parseFloat(scoreMatch.group(1));
if (score < getLowestScore()) {
continue;
}
if (score < getLowestScore()) {
continue;
}
String bokNum = bookMatcher.group(1);
String bookUrl = getIndexUrl() + "/" + bokNum + "/";
String bokNum = bookMatcher.group(1);
String bookUrl = getIndexUrl() + "/" + bokNum + "/";
String bookName = bookNameMatch.group(1);
String bookName = bookNameMatch.group(1);
String author = authorMatch.group(1);
String author = authorMatch.group(1);
Boolean hasBook = bookService.hasBook(bookName, author);
Boolean hasBook = bookService.hasBook(bookName, author);
if(hasBook) {
if (hasBook) {
bookService.addBookParseLog(bookUrl, bookName, score);
bookService.addBookParseLog(bookUrl, bookName, score);
}
String body = RestTemplateUtil.getBodyByUtf8(bookUrl);
if (body != null) {
Pattern updateTimePatten = compile(getUpdateTimePattern());
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
updateTime = format.parse(updateTimeStr);
if(!newCat2Date.containsKey(i)) {
newCat2Date.put(i, updateTime);
}
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
} finally {
bookMatcher.find();
isFind = bookMatcher.find();
scoreFind = scoreMatch.find();
isBookNameMatch = bookNameMatch.find();
}
} catch (Exception e) {
log.error(e.getMessage(), e);
} finally {
bookMatcher.find();
isFind = bookMatcher.find();
scoreFind = scoreMatch.find();
isBookNameMatch = bookNameMatch.find();
}
}
}
}while (updateTime.getTime()>=lastUpdateTime.getTime());
}
bookService.updateBookUpdateTimeLog(newCat2Date);
}

View File

@ -66,7 +66,7 @@ public class StartListener implements ServletContextListener {
log.info("updateBooks执行中。。。。。。。。。。。。");
crawlSource.update();
Thread.sleep(1000 * 60 * 10);
Thread.sleep(new Float(1000 * 60 * bookUpdatePeriod).longValue());
} catch (Exception e) {
log.error(e.getMessage(), e);
}

View File

@ -0,0 +1,30 @@
package xyz.zinglizingli.books.mapper;
import java.util.List;
import org.apache.ibatis.annotations.Param;
import xyz.zinglizingli.books.po.BookUpdateTimeLog;
import xyz.zinglizingli.books.po.BookUpdateTimeLogExample;
public interface BookUpdateTimeLogMapper {
int countByExample(BookUpdateTimeLogExample example);
int deleteByExample(BookUpdateTimeLogExample example);
int deleteByPrimaryKey(Integer id);
int insert(BookUpdateTimeLog record);
int insertSelective(BookUpdateTimeLog record);
List<BookUpdateTimeLog> selectByExample(BookUpdateTimeLogExample example);
BookUpdateTimeLog selectByPrimaryKey(Integer id);
int updateByExampleSelective(@Param("record") BookUpdateTimeLog record, @Param("example") BookUpdateTimeLogExample example);
int updateByExample(@Param("record") BookUpdateTimeLog record, @Param("example") BookUpdateTimeLogExample example);
int updateByPrimaryKeySelective(BookUpdateTimeLog record);
int updateByPrimaryKey(BookUpdateTimeLog record);
}

View File

@ -0,0 +1,35 @@
package xyz.zinglizingli.books.po;
import java.util.Date;
public class BookUpdateTimeLog {
private Integer id;
private Integer bookCatId;
private Date lastUpdateTime;
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public Integer getBookCatId() {
return bookCatId;
}
public void setBookCatId(Integer bookCatId) {
this.bookCatId = bookCatId;
}
public Date getLastUpdateTime() {
return lastUpdateTime;
}
public void setLastUpdateTime(Date lastUpdateTime) {
this.lastUpdateTime = lastUpdateTime;
}
}

View File

@ -0,0 +1,381 @@
package xyz.zinglizingli.books.po;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class BookUpdateTimeLogExample {
protected String orderByClause;
protected boolean distinct;
protected List<Criteria> oredCriteria;
public BookUpdateTimeLogExample() {
oredCriteria = new ArrayList<Criteria>();
}
public void setOrderByClause(String orderByClause) {
this.orderByClause = orderByClause;
}
public String getOrderByClause() {
return orderByClause;
}
public void setDistinct(boolean distinct) {
this.distinct = distinct;
}
public boolean isDistinct() {
return distinct;
}
public List<Criteria> getOredCriteria() {
return oredCriteria;
}
public void or(Criteria criteria) {
oredCriteria.add(criteria);
}
public Criteria or() {
Criteria criteria = createCriteriaInternal();
oredCriteria.add(criteria);
return criteria;
}
public Criteria createCriteria() {
Criteria criteria = createCriteriaInternal();
if (oredCriteria.size() == 0) {
oredCriteria.add(criteria);
}
return criteria;
}
protected Criteria createCriteriaInternal() {
Criteria criteria = new Criteria();
return criteria;
}
public void clear() {
oredCriteria.clear();
orderByClause = null;
distinct = false;
}
protected abstract static class GeneratedCriteria {
protected List<Criterion> criteria;
protected GeneratedCriteria() {
super();
criteria = new ArrayList<Criterion>();
}
public boolean isValid() {
return criteria.size() > 0;
}
public List<Criterion> getAllCriteria() {
return criteria;
}
public List<Criterion> getCriteria() {
return criteria;
}
protected void addCriterion(String condition) {
if (condition == null) {
throw new RuntimeException("Value for condition cannot be null");
}
criteria.add(new Criterion(condition));
}
protected void addCriterion(String condition, Object value, String property) {
if (value == null) {
throw new RuntimeException("Value for " + property + " cannot be null");
}
criteria.add(new Criterion(condition, value));
}
protected void addCriterion(String condition, Object value1, Object value2, String property) {
if (value1 == null || value2 == null) {
throw new RuntimeException("Between values for " + property + " cannot be null");
}
criteria.add(new Criterion(condition, value1, value2));
}
public Criteria andIdIsNull() {
addCriterion("id is null");
return (Criteria) this;
}
public Criteria andIdIsNotNull() {
addCriterion("id is not null");
return (Criteria) this;
}
public Criteria andIdEqualTo(Integer value) {
addCriterion("id =", value, "id");
return (Criteria) this;
}
public Criteria andIdNotEqualTo(Integer value) {
addCriterion("id <>", value, "id");
return (Criteria) this;
}
public Criteria andIdGreaterThan(Integer value) {
addCriterion("id >", value, "id");
return (Criteria) this;
}
public Criteria andIdGreaterThanOrEqualTo(Integer value) {
addCriterion("id >=", value, "id");
return (Criteria) this;
}
public Criteria andIdLessThan(Integer value) {
addCriterion("id <", value, "id");
return (Criteria) this;
}
public Criteria andIdLessThanOrEqualTo(Integer value) {
addCriterion("id <=", value, "id");
return (Criteria) this;
}
public Criteria andIdIn(List<Integer> values) {
addCriterion("id in", values, "id");
return (Criteria) this;
}
public Criteria andIdNotIn(List<Integer> values) {
addCriterion("id not in", values, "id");
return (Criteria) this;
}
public Criteria andIdBetween(Integer value1, Integer value2) {
addCriterion("id between", value1, value2, "id");
return (Criteria) this;
}
public Criteria andIdNotBetween(Integer value1, Integer value2) {
addCriterion("id not between", value1, value2, "id");
return (Criteria) this;
}
public Criteria andBookCatIdIsNull() {
addCriterion("book_cat_id is null");
return (Criteria) this;
}
public Criteria andBookCatIdIsNotNull() {
addCriterion("book_cat_id is not null");
return (Criteria) this;
}
public Criteria andBookCatIdEqualTo(Integer value) {
addCriterion("book_cat_id =", value, "bookCatId");
return (Criteria) this;
}
public Criteria andBookCatIdNotEqualTo(Integer value) {
addCriterion("book_cat_id <>", value, "bookCatId");
return (Criteria) this;
}
public Criteria andBookCatIdGreaterThan(Integer value) {
addCriterion("book_cat_id >", value, "bookCatId");
return (Criteria) this;
}
public Criteria andBookCatIdGreaterThanOrEqualTo(Integer value) {
addCriterion("book_cat_id >=", value, "bookCatId");
return (Criteria) this;
}
public Criteria andBookCatIdLessThan(Integer value) {
addCriterion("book_cat_id <", value, "bookCatId");
return (Criteria) this;
}
public Criteria andBookCatIdLessThanOrEqualTo(Integer value) {
addCriterion("book_cat_id <=", value, "bookCatId");
return (Criteria) this;
}
public Criteria andBookCatIdIn(List<Integer> values) {
addCriterion("book_cat_id in", values, "bookCatId");
return (Criteria) this;
}
public Criteria andBookCatIdNotIn(List<Integer> values) {
addCriterion("book_cat_id not in", values, "bookCatId");
return (Criteria) this;
}
public Criteria andBookCatIdBetween(Integer value1, Integer value2) {
addCriterion("book_cat_id between", value1, value2, "bookCatId");
return (Criteria) this;
}
public Criteria andBookCatIdNotBetween(Integer value1, Integer value2) {
addCriterion("book_cat_id not between", value1, value2, "bookCatId");
return (Criteria) this;
}
public Criteria andLastUpdateTimeIsNull() {
addCriterion("last_update_time is null");
return (Criteria) this;
}
public Criteria andLastUpdateTimeIsNotNull() {
addCriterion("last_update_time is not null");
return (Criteria) this;
}
public Criteria andLastUpdateTimeEqualTo(Date value) {
addCriterion("last_update_time =", value, "lastUpdateTime");
return (Criteria) this;
}
public Criteria andLastUpdateTimeNotEqualTo(Date value) {
addCriterion("last_update_time <>", value, "lastUpdateTime");
return (Criteria) this;
}
public Criteria andLastUpdateTimeGreaterThan(Date value) {
addCriterion("last_update_time >", value, "lastUpdateTime");
return (Criteria) this;
}
public Criteria andLastUpdateTimeGreaterThanOrEqualTo(Date value) {
addCriterion("last_update_time >=", value, "lastUpdateTime");
return (Criteria) this;
}
public Criteria andLastUpdateTimeLessThan(Date value) {
addCriterion("last_update_time <", value, "lastUpdateTime");
return (Criteria) this;
}
public Criteria andLastUpdateTimeLessThanOrEqualTo(Date value) {
addCriterion("last_update_time <=", value, "lastUpdateTime");
return (Criteria) this;
}
public Criteria andLastUpdateTimeIn(List<Date> values) {
addCriterion("last_update_time in", values, "lastUpdateTime");
return (Criteria) this;
}
public Criteria andLastUpdateTimeNotIn(List<Date> values) {
addCriterion("last_update_time not in", values, "lastUpdateTime");
return (Criteria) this;
}
public Criteria andLastUpdateTimeBetween(Date value1, Date value2) {
addCriterion("last_update_time between", value1, value2, "lastUpdateTime");
return (Criteria) this;
}
public Criteria andLastUpdateTimeNotBetween(Date value1, Date value2) {
addCriterion("last_update_time not between", value1, value2, "lastUpdateTime");
return (Criteria) this;
}
}
public static class Criteria extends GeneratedCriteria {
protected Criteria() {
super();
}
}
public static class Criterion {
private String condition;
private Object value;
private Object secondValue;
private boolean noValue;
private boolean singleValue;
private boolean betweenValue;
private boolean listValue;
private String typeHandler;
public String getCondition() {
return condition;
}
public Object getValue() {
return value;
}
public Object getSecondValue() {
return secondValue;
}
public boolean isNoValue() {
return noValue;
}
public boolean isSingleValue() {
return singleValue;
}
public boolean isBetweenValue() {
return betweenValue;
}
public boolean isListValue() {
return listValue;
}
public String getTypeHandler() {
return typeHandler;
}
protected Criterion(String condition) {
super();
this.condition = condition;
this.typeHandler = null;
this.noValue = true;
}
protected Criterion(String condition, Object value, String typeHandler) {
super();
this.condition = condition;
this.value = value;
this.typeHandler = typeHandler;
if (value instanceof List<?>) {
this.listValue = true;
} else {
this.singleValue = true;
}
}
protected Criterion(String condition, Object value) {
this(condition, value, null);
}
protected Criterion(String condition, Object value, Object secondValue, String typeHandler) {
super();
this.condition = condition;
this.value = value;
this.secondValue = secondValue;
this.typeHandler = typeHandler;
this.betweenValue = true;
}
protected Criterion(String condition, Object value, Object secondValue) {
this(condition, value, secondValue, null);
}
}
}

View File

@ -3,30 +3,20 @@ package xyz.zinglizingli.books.service;
import com.github.pagehelper.PageHelper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.Charsets;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.client.utils.DateUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import tk.mybatis.orderbyhelper.OrderByHelper;
import xyz.zinglizingli.books.core.constant.CacheKeyConstans;
import xyz.zinglizingli.books.core.enums.PicSaveType;
import xyz.zinglizingli.books.core.utils.Constants;
import xyz.zinglizingli.books.mapper.*;
import xyz.zinglizingli.books.po.*;
import xyz.zinglizingli.books.core.utils.Constants;
import xyz.zinglizingli.common.cache.CommonCacheUtil;
import xyz.zinglizingli.common.utils.FileUtil;
import xyz.zinglizingli.common.utils.SpringUtil;
import xyz.zinglizingli.common.utils.UUIDUtils;
import xyz.zinglizingli.common.cache.CommonCacheUtil;
import xyz.zinglizingli.common.utils.RestTemplateUtil;
import java.io.*;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Collectors;
@ -51,6 +41,8 @@ public class BookService {
private final BookParseLogMapper bookParseLogMapper;
private final BookUpdateTimeLogMapper bookUpdateTimeLogMapper;
private final CommonCacheUtil cacheUtil;
@ -491,4 +483,31 @@ public class BookService {
example.createCriteria().andBookNameEqualTo(bookName).andAuthorEqualTo(author);
return bookMapper.countByExample(example)>0;
}
/**
* 查询分类更新时间映射信息
* */
public Map<Integer, Date> queryLastUpdateTime() {
List<BookUpdateTimeLog> list = bookUpdateTimeLogMapper.selectByExample(new BookUpdateTimeLogExample());
return list.stream().collect(Collectors.toMap(BookUpdateTimeLog::getBookCatId, BookUpdateTimeLog::getLastUpdateTime,(key1, key2) -> key2));
}
/**
* 更新分类时间日志
* */
public void updateBookUpdateTimeLog(Map<Integer, Date> cat2Date) {
if(cat2Date.size()>0) {
Set<Map.Entry<Integer, Date>> entries = cat2Date.entrySet();
for(Map.Entry<Integer, Date> entry : entries){
BookUpdateTimeLogExample example = new BookUpdateTimeLogExample();
example.createCriteria().andBookCatIdEqualTo(entry.getKey());
BookUpdateTimeLog entity = new BookUpdateTimeLog();
entity.setLastUpdateTime(entry.getValue());
bookUpdateTimeLogMapper.updateByExampleSelective(entity,example);
}
}
}
}