更新策略调整

This commit is contained in:
xiongxiaoyang 2020-01-13 11:28:59 +08:00
parent a35ac89d89
commit 0c6c7ba8f3
10 changed files with 1063 additions and 161 deletions

View File

@ -17,4 +17,10 @@ public abstract class BaseCrawlSource {
* 解析数据
* */
public abstract void parse();
/**
* 更新书籍
* */
public abstract void update();
}

View File

@ -3,9 +3,12 @@ package xyz.zinglizingli.books.core.crawl;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import xyz.zinglizingli.books.core.utils.Constants;
import xyz.zinglizingli.books.mapper.BookParseLogMapper;
import xyz.zinglizingli.books.po.Book;
import xyz.zinglizingli.books.po.BookContent;
import xyz.zinglizingli.books.po.BookIndex;
import xyz.zinglizingli.books.po.BookParseLog;
import xyz.zinglizingli.books.service.BookService;
import xyz.zinglizingli.books.core.utils.CatUtil;
import xyz.zinglizingli.common.utils.ExcutorUtils;
@ -34,177 +37,208 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
@Override
public void parse() {
String catBookListUrl = getListPageUrl().replace("{0}", "0").replace("{1}", "1");
String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl);
if (forObject != null) {
//解析第一页书籍的数据
Pattern bookPatten = compile(getBookUrlPattern());
for(int page = 1; page<= Constants.UPDATE_PAGES_ONCE; page++) {
String catBookListUrl = getListPageUrl().replace("{0}", "0").replace("{1}", page+"");
String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl);
if (forObject != null) {
//解析第一页书籍的数据
Pattern bookPatten = compile(getBookUrlPattern());
Matcher bookMatcher = bookPatten.matcher(forObject);
Matcher bookMatcher = bookPatten.matcher(forObject);
boolean isFind = bookMatcher.find();
Pattern scorePatten = compile(getScorePattern());
Matcher scoreMatch = scorePatten.matcher(forObject);
boolean scoreFind = scoreMatch.find();
boolean isFind = bookMatcher.find();
Pattern scorePatten = compile(getScorePattern());
Matcher scoreMatch = scorePatten.matcher(forObject);
boolean scoreFind = scoreMatch.find();
Pattern bookNamePatten = compile(getBookNamePattern());
Pattern bookNamePatten = compile(getBookNamePattern());
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
boolean isBookNameMatch = bookNameMatch.find();
boolean isBookNameMatch = bookNameMatch.find();
while (isFind && scoreFind && isBookNameMatch) {
try {
Float score = Float.parseFloat(scoreMatch.group(1));
if (score < getLowestScore()) {
continue;
}
String bokNum = bookMatcher.group(1);
String bookUrl = getIndexUrl() + "/" + bokNum + "/";
String body = RestTemplateUtil.getBodyByUtf8(bookUrl);
if (body != null) {
String bookName = bookNameMatch.group(1);
Pattern authorPatten = compile(getAuthorPattern());
Matcher authoreMatch = authorPatten.matcher(body);
if (authoreMatch.find()) {
String author = authoreMatch.group(1);
Pattern statusPatten = compile(getStatusPattern());
Matcher statusMatch = statusPatten.matcher(body);
if (statusMatch.find()) {
String status = statusMatch.group(1);
Pattern catPatten = compile(getCatPattern());
Matcher catMatch = catPatten.matcher(body);
if (catMatch.find()) {
String catName = catMatch.group(1);
int catNum = CatUtil.getCatNum(catName);
Pattern updateTimePatten = compile(getUpdateTimePattern());
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
Date updateTime = format.parse(updateTimeStr);
Pattern picPatten = compile(getPicPattern());
Matcher picMather = picPatten.matcher(body);
if (picMather.find()) {
String picSrc = picMather.group(1);
String desc = body.substring(body.indexOf("<p class=\"review\">") + "<p class=\"review\">".length());
desc = desc.substring(0, desc.indexOf("</p>"));
Book book = new Book();
book.setAuthor(author);
book.setCatid(catNum);
book.setBookDesc(desc);
book.setBookName(bookName);
book.setScore(score > 10 ? 8.0f : score);
book.setPicUrl(picSrc);
book.setBookStatus(status);
book.setUpdateTime(updateTime);
List<BookIndex> indexList = new ArrayList<>();
List<BookContent> contentList = new ArrayList<>();
//读取目录
Pattern indexPatten = compile(getCatalogUrlPattern());
Matcher indexMatch = indexPatten.matcher(body);
if (indexMatch.find()) {
String indexUrl = getIndexUrl() + indexMatch.group(1);
String body2 = RestTemplateUtil.getBodyByUtf8(indexUrl);
if (body2 != null) {
Pattern indexListPatten = compile(getCatalogPattern());
Matcher indexListMatch = indexListPatten.matcher(body2);
boolean isFindIndex = indexListMatch.find();
int indexNum = 0;
//查询该书籍已存在目录号
Map<Integer, BookIndex> hasIndexs = bookService.queryIndexByBookNameAndAuthor(bookName, author);
//更新和插入分别开此处只做更新
if (hasIndexs.size() > 0) {
while (isFindIndex) {
BookIndex hasIndex = hasIndexs.get(indexNum);
String indexName = indexListMatch.group(2);
if (hasIndex == null || !StringUtils.deleteWhitespace(hasIndex.getIndexName()).equals(StringUtils.deleteWhitespace(indexName))) {
String contentUrl = getIndexUrl() + indexListMatch.group(1);
//查询章节内容
String body3 = RestTemplateUtil.getBodyByUtf8(contentUrl.replace("//m.", "//www.").replace("//wap.", "//www."));
if (body3 != null) {
String start = "id=\"content\">";
String end = "<script>";
String content = body3.substring(body3.indexOf(start) + start.length());
content = "<div class=\"article-content font16\" id=\"ChapterBody\" data-class=\"font16\">" + content.substring(0, content.indexOf(end)) + "</div>";
//TODO插入章节目录和章节内容
BookIndex bookIndex = new BookIndex();
bookIndex.setIndexName(indexName);
bookIndex.setIndexNum(indexNum);
indexList.add(bookIndex);
BookContent bookContent = new BookContent();
bookContent.setContent(content);
bookContent.setIndexNum(indexNum);
contentList.add(bookContent);
} else {
break;
}
}
indexNum++;
isFindIndex = indexListMatch.find();
}
if (indexList.size() == contentList.size() && indexList.size() > 0) {
bookService.saveBookAndIndexAndContent(book, indexList, contentList);
}
}
}
}
}
}
}
}
while (isFind && scoreFind && isBookNameMatch) {
try {
Float score = Float.parseFloat(scoreMatch.group(1));
if (score < getLowestScore()) {
continue;
}
String bokNum = bookMatcher.group(1);
String bookUrl = getIndexUrl() + "/" + bokNum + "/";
String bookName = bookNameMatch.group(1);
bookService.addBookParseLog(bookUrl, bookName, score);
} catch (Exception e) {
log.error(e.getMessage(), e);
} finally {
bookMatcher.find();
isFind = bookMatcher.find();
scoreFind = scoreMatch.find();
isBookNameMatch = bookNameMatch.find();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
bookMatcher.find();
isFind = bookMatcher.find();
scoreFind = scoreMatch.find();
isBookNameMatch = bookNameMatch.find();
}
}
}
}
@Override
public void update() {
List<BookParseLog> logs = bookService.queryBookParseLogs();
List<Long> successLogIds = new ArrayList<>();
for (BookParseLog bookParseLog : logs) {
try {
Float score = bookParseLog.getScore();
String bookUrl = bookParseLog.getBookUrl();
String bookName = bookParseLog.getBookName();
String body = RestTemplateUtil.getBodyByUtf8(bookUrl);
if (body != null) {
Pattern authorPatten = compile(getAuthorPattern());
Matcher authoreMatch = authorPatten.matcher(body);
if (authoreMatch.find()) {
String author = authoreMatch.group(1);
Pattern statusPatten = compile(getStatusPattern());
Matcher statusMatch = statusPatten.matcher(body);
if (statusMatch.find()) {
String status = statusMatch.group(1);
Pattern catPatten = compile(getCatPattern());
Matcher catMatch = catPatten.matcher(body);
if (catMatch.find()) {
String catName = catMatch.group(1);
int catNum = CatUtil.getCatNum(catName);
Pattern updateTimePatten = compile(getUpdateTimePattern());
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
Date updateTime = format.parse(updateTimeStr);
Pattern picPatten = compile(getPicPattern());
Matcher picMather = picPatten.matcher(body);
if (picMather.find()) {
String picSrc = picMather.group(1);
String desc = body.substring(body.indexOf("<p class=\"review\">") + "<p class=\"review\">".length());
desc = desc.substring(0, desc.indexOf("</p>"));
Book book = new Book();
book.setAuthor(author);
book.setCatid(catNum);
book.setBookDesc(desc);
book.setBookName(bookName);
book.setScore(score > 10 ? 8.0f : score);
book.setPicUrl(picSrc);
book.setBookStatus(status);
book.setUpdateTime(updateTime);
List<BookIndex> indexList = new ArrayList<>();
List<BookContent> contentList = new ArrayList<>();
//读取目录
Pattern indexPatten = compile(getCatalogUrlPattern());
Matcher indexMatch = indexPatten.matcher(body);
if (indexMatch.find()) {
String indexUrl = getIndexUrl() + indexMatch.group(1);
String body2 = RestTemplateUtil.getBodyByUtf8(indexUrl);
if (body2 != null) {
Pattern indexListPatten = compile(getCatalogPattern());
Matcher indexListMatch = indexListPatten.matcher(body2);
boolean isFindIndex = indexListMatch.find();
int indexNum = 0;
//查询该书籍已存在目录号
Map<Integer, BookIndex> hasIndexs = bookService.queryIndexByBookNameAndAuthor(bookName, author);
//更新和插入分别开此处只做更新
if (hasIndexs.size() > 0) {
while (isFindIndex) {
BookIndex hasIndex = hasIndexs.get(indexNum);
String indexName = indexListMatch.group(2);
if (hasIndex == null || !StringUtils.deleteWhitespace(hasIndex.getIndexName()).equals(StringUtils.deleteWhitespace(indexName))) {
String contentUrl = getIndexUrl() + indexListMatch.group(1);
//查询章节内容
String body3 = RestTemplateUtil.getBodyByUtf8(contentUrl.replace("//m.", "//www.").replace("//wap.", "//www."));
if (body3 != null) {
String start = "id=\"content\">";
String end = "<script>";
String content = body3.substring(body3.indexOf(start) + start.length());
content = "<div class=\"article-content font16\" id=\"ChapterBody\" data-class=\"font16\">" + content.substring(0, content.indexOf(end)) + "</div>";
//TODO插入章节目录和章节内容
BookIndex bookIndex = new BookIndex();
bookIndex.setIndexName(indexName);
bookIndex.setIndexNum(indexNum);
indexList.add(bookIndex);
BookContent bookContent = new BookContent();
bookContent.setContent(content);
bookContent.setIndexNum(indexNum);
contentList.add(bookContent);
} else {
break;
}
}
indexNum++;
isFindIndex = indexListMatch.find();
}
if (indexList.size() == contentList.size() && indexList.size() > 0) {
bookService.saveBookAndIndexAndContent(book, indexList, contentList);
}
}
successLogIds.add(bookParseLog.getId());
}
}
}
}
}
}
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
bookService.deleteBookParseLogs(successLogIds);
}
}

View File

@ -105,4 +105,9 @@ public class Constants {
* SEO配置保存的key
* */
public static final String SEO_CONFIG_KEY = "seoConfig";
/**
* 每次更新抓取的页数
*/
public static final int UPDATE_PAGES_ONCE = 10;
}

View File

@ -0,0 +1,30 @@
package xyz.zinglizingli.books.mapper;
import java.util.List;
import org.apache.ibatis.annotations.Param;
import xyz.zinglizingli.books.po.BookParseLog;
import xyz.zinglizingli.books.po.BookParseLogExample;
public interface BookParseLogMapper {
int countByExample(BookParseLogExample example);
int deleteByExample(BookParseLogExample example);
int deleteByPrimaryKey(Long id);
int insert(BookParseLog record);
int insertSelective(BookParseLog record);
List<BookParseLog> selectByExample(BookParseLogExample example);
BookParseLog selectByPrimaryKey(Long id);
int updateByExampleSelective(@Param("record") BookParseLog record, @Param("example") BookParseLogExample example);
int updateByExample(@Param("record") BookParseLog record, @Param("example") BookParseLogExample example);
int updateByPrimaryKeySelective(BookParseLog record);
int updateByPrimaryKey(BookParseLog record);
}

View File

@ -0,0 +1,55 @@
package xyz.zinglizingli.books.po;
import java.util.Date;
public class BookParseLog {
private Long id;
private String bookUrl;
private String bookName;
private Float score;
private Date createTime;
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getBookUrl() {
return bookUrl;
}
public void setBookUrl(String bookUrl) {
this.bookUrl = bookUrl == null ? null : bookUrl.trim();
}
public String getBookName() {
return bookName;
}
public void setBookName(String bookName) {
this.bookName = bookName == null ? null : bookName.trim();
}
public Float getScore() {
return score;
}
public void setScore(Float score) {
this.score = score;
}
public Date getCreateTime() {
return createTime;
}
public void setCreateTime(Date createTime) {
this.createTime = createTime;
}
}

View File

@ -0,0 +1,521 @@
package xyz.zinglizingli.books.po;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class BookParseLogExample {
protected String orderByClause;
protected boolean distinct;
protected List<Criteria> oredCriteria;
public BookParseLogExample() {
oredCriteria = new ArrayList<Criteria>();
}
public void setOrderByClause(String orderByClause) {
this.orderByClause = orderByClause;
}
public String getOrderByClause() {
return orderByClause;
}
public void setDistinct(boolean distinct) {
this.distinct = distinct;
}
public boolean isDistinct() {
return distinct;
}
public List<Criteria> getOredCriteria() {
return oredCriteria;
}
public void or(Criteria criteria) {
oredCriteria.add(criteria);
}
public Criteria or() {
Criteria criteria = createCriteriaInternal();
oredCriteria.add(criteria);
return criteria;
}
public Criteria createCriteria() {
Criteria criteria = createCriteriaInternal();
if (oredCriteria.size() == 0) {
oredCriteria.add(criteria);
}
return criteria;
}
protected Criteria createCriteriaInternal() {
Criteria criteria = new Criteria();
return criteria;
}
public void clear() {
oredCriteria.clear();
orderByClause = null;
distinct = false;
}
protected abstract static class GeneratedCriteria {
protected List<Criterion> criteria;
protected GeneratedCriteria() {
super();
criteria = new ArrayList<Criterion>();
}
public boolean isValid() {
return criteria.size() > 0;
}
public List<Criterion> getAllCriteria() {
return criteria;
}
public List<Criterion> getCriteria() {
return criteria;
}
protected void addCriterion(String condition) {
if (condition == null) {
throw new RuntimeException("Value for condition cannot be null");
}
criteria.add(new Criterion(condition));
}
protected void addCriterion(String condition, Object value, String property) {
if (value == null) {
throw new RuntimeException("Value for " + property + " cannot be null");
}
criteria.add(new Criterion(condition, value));
}
protected void addCriterion(String condition, Object value1, Object value2, String property) {
if (value1 == null || value2 == null) {
throw new RuntimeException("Between values for " + property + " cannot be null");
}
criteria.add(new Criterion(condition, value1, value2));
}
public Criteria andIdIsNull() {
addCriterion("id is null");
return (Criteria) this;
}
public Criteria andIdIsNotNull() {
addCriterion("id is not null");
return (Criteria) this;
}
public Criteria andIdEqualTo(Long value) {
addCriterion("id =", value, "id");
return (Criteria) this;
}
public Criteria andIdNotEqualTo(Long value) {
addCriterion("id <>", value, "id");
return (Criteria) this;
}
public Criteria andIdGreaterThan(Long value) {
addCriterion("id >", value, "id");
return (Criteria) this;
}
public Criteria andIdGreaterThanOrEqualTo(Long value) {
addCriterion("id >=", value, "id");
return (Criteria) this;
}
public Criteria andIdLessThan(Long value) {
addCriterion("id <", value, "id");
return (Criteria) this;
}
public Criteria andIdLessThanOrEqualTo(Long value) {
addCriterion("id <=", value, "id");
return (Criteria) this;
}
public Criteria andIdIn(List<Long> values) {
addCriterion("id in", values, "id");
return (Criteria) this;
}
public Criteria andIdNotIn(List<Long> values) {
addCriterion("id not in", values, "id");
return (Criteria) this;
}
public Criteria andIdBetween(Long value1, Long value2) {
addCriterion("id between", value1, value2, "id");
return (Criteria) this;
}
public Criteria andIdNotBetween(Long value1, Long value2) {
addCriterion("id not between", value1, value2, "id");
return (Criteria) this;
}
public Criteria andBookUrlIsNull() {
addCriterion("book_url is null");
return (Criteria) this;
}
public Criteria andBookUrlIsNotNull() {
addCriterion("book_url is not null");
return (Criteria) this;
}
public Criteria andBookUrlEqualTo(String value) {
addCriterion("book_url =", value, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlNotEqualTo(String value) {
addCriterion("book_url <>", value, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlGreaterThan(String value) {
addCriterion("book_url >", value, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlGreaterThanOrEqualTo(String value) {
addCriterion("book_url >=", value, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlLessThan(String value) {
addCriterion("book_url <", value, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlLessThanOrEqualTo(String value) {
addCriterion("book_url <=", value, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlLike(String value) {
addCriterion("book_url like", value, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlNotLike(String value) {
addCriterion("book_url not like", value, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlIn(List<String> values) {
addCriterion("book_url in", values, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlNotIn(List<String> values) {
addCriterion("book_url not in", values, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlBetween(String value1, String value2) {
addCriterion("book_url between", value1, value2, "bookUrl");
return (Criteria) this;
}
public Criteria andBookUrlNotBetween(String value1, String value2) {
addCriterion("book_url not between", value1, value2, "bookUrl");
return (Criteria) this;
}
public Criteria andBookNameIsNull() {
addCriterion("book_name is null");
return (Criteria) this;
}
public Criteria andBookNameIsNotNull() {
addCriterion("book_name is not null");
return (Criteria) this;
}
public Criteria andBookNameEqualTo(String value) {
addCriterion("book_name =", value, "bookName");
return (Criteria) this;
}
public Criteria andBookNameNotEqualTo(String value) {
addCriterion("book_name <>", value, "bookName");
return (Criteria) this;
}
public Criteria andBookNameGreaterThan(String value) {
addCriterion("book_name >", value, "bookName");
return (Criteria) this;
}
public Criteria andBookNameGreaterThanOrEqualTo(String value) {
addCriterion("book_name >=", value, "bookName");
return (Criteria) this;
}
public Criteria andBookNameLessThan(String value) {
addCriterion("book_name <", value, "bookName");
return (Criteria) this;
}
public Criteria andBookNameLessThanOrEqualTo(String value) {
addCriterion("book_name <=", value, "bookName");
return (Criteria) this;
}
public Criteria andBookNameLike(String value) {
addCriterion("book_name like", value, "bookName");
return (Criteria) this;
}
public Criteria andBookNameNotLike(String value) {
addCriterion("book_name not like", value, "bookName");
return (Criteria) this;
}
public Criteria andBookNameIn(List<String> values) {
addCriterion("book_name in", values, "bookName");
return (Criteria) this;
}
public Criteria andBookNameNotIn(List<String> values) {
addCriterion("book_name not in", values, "bookName");
return (Criteria) this;
}
public Criteria andBookNameBetween(String value1, String value2) {
addCriterion("book_name between", value1, value2, "bookName");
return (Criteria) this;
}
public Criteria andBookNameNotBetween(String value1, String value2) {
addCriterion("book_name not between", value1, value2, "bookName");
return (Criteria) this;
}
public Criteria andScoreIsNull() {
addCriterion("score is null");
return (Criteria) this;
}
public Criteria andScoreIsNotNull() {
addCriterion("score is not null");
return (Criteria) this;
}
public Criteria andScoreEqualTo(Float value) {
addCriterion("score =", value, "score");
return (Criteria) this;
}
public Criteria andScoreNotEqualTo(Float value) {
addCriterion("score <>", value, "score");
return (Criteria) this;
}
public Criteria andScoreGreaterThan(Float value) {
addCriterion("score >", value, "score");
return (Criteria) this;
}
public Criteria andScoreGreaterThanOrEqualTo(Float value) {
addCriterion("score >=", value, "score");
return (Criteria) this;
}
public Criteria andScoreLessThan(Float value) {
addCriterion("score <", value, "score");
return (Criteria) this;
}
public Criteria andScoreLessThanOrEqualTo(Float value) {
addCriterion("score <=", value, "score");
return (Criteria) this;
}
public Criteria andScoreIn(List<Float> values) {
addCriterion("score in", values, "score");
return (Criteria) this;
}
public Criteria andScoreNotIn(List<Float> values) {
addCriterion("score not in", values, "score");
return (Criteria) this;
}
public Criteria andScoreBetween(Float value1, Float value2) {
addCriterion("score between", value1, value2, "score");
return (Criteria) this;
}
public Criteria andScoreNotBetween(Float value1, Float value2) {
addCriterion("score not between", value1, value2, "score");
return (Criteria) this;
}
public Criteria andCreateTimeIsNull() {
addCriterion("create_time is null");
return (Criteria) this;
}
public Criteria andCreateTimeIsNotNull() {
addCriterion("create_time is not null");
return (Criteria) this;
}
public Criteria andCreateTimeEqualTo(Date value) {
addCriterion("create_time =", value, "createTime");
return (Criteria) this;
}
public Criteria andCreateTimeNotEqualTo(Date value) {
addCriterion("create_time <>", value, "createTime");
return (Criteria) this;
}
public Criteria andCreateTimeGreaterThan(Date value) {
addCriterion("create_time >", value, "createTime");
return (Criteria) this;
}
public Criteria andCreateTimeGreaterThanOrEqualTo(Date value) {
addCriterion("create_time >=", value, "createTime");
return (Criteria) this;
}
public Criteria andCreateTimeLessThan(Date value) {
addCriterion("create_time <", value, "createTime");
return (Criteria) this;
}
public Criteria andCreateTimeLessThanOrEqualTo(Date value) {
addCriterion("create_time <=", value, "createTime");
return (Criteria) this;
}
public Criteria andCreateTimeIn(List<Date> values) {
addCriterion("create_time in", values, "createTime");
return (Criteria) this;
}
public Criteria andCreateTimeNotIn(List<Date> values) {
addCriterion("create_time not in", values, "createTime");
return (Criteria) this;
}
public Criteria andCreateTimeBetween(Date value1, Date value2) {
addCriterion("create_time between", value1, value2, "createTime");
return (Criteria) this;
}
public Criteria andCreateTimeNotBetween(Date value1, Date value2) {
addCriterion("create_time not between", value1, value2, "createTime");
return (Criteria) this;
}
}
public static class Criteria extends GeneratedCriteria {
protected Criteria() {
super();
}
}
public static class Criterion {
private String condition;
private Object value;
private Object secondValue;
private boolean noValue;
private boolean singleValue;
private boolean betweenValue;
private boolean listValue;
private String typeHandler;
public String getCondition() {
return condition;
}
public Object getValue() {
return value;
}
public Object getSecondValue() {
return secondValue;
}
public boolean isNoValue() {
return noValue;
}
public boolean isSingleValue() {
return singleValue;
}
public boolean isBetweenValue() {
return betweenValue;
}
public boolean isListValue() {
return listValue;
}
public String getTypeHandler() {
return typeHandler;
}
protected Criterion(String condition) {
super();
this.condition = condition;
this.typeHandler = null;
this.noValue = true;
}
protected Criterion(String condition, Object value, String typeHandler) {
super();
this.condition = condition;
this.value = value;
this.typeHandler = typeHandler;
if (value instanceof List<?>) {
this.listValue = true;
} else {
this.singleValue = true;
}
}
protected Criterion(String condition, Object value) {
this(condition, value, null);
}
protected Criterion(String condition, Object value, Object secondValue, String typeHandler) {
super();
this.condition = condition;
this.value = value;
this.secondValue = secondValue;
this.typeHandler = typeHandler;
this.betweenValue = true;
}
protected Criterion(String condition, Object value, Object secondValue) {
this(condition, value, secondValue, null);
}
}
}

View File

@ -49,6 +49,8 @@ public class BookService {
private final UserRefBookMapper userRefBookMapper;
private final BookParseLogMapper bookParseLogMapper;
private final CommonCacheUtil cacheUtil;
@ -111,13 +113,13 @@ public class BookService {
newBookIndexList.add(bookIndexItem);
newContentList.add(bookContentItem);
}
//一次最多只允许插入100条记录,否则影响服务器响应
if (isUpdate && i % 100 == 0 && newBookIndexList.size() > 0) {
//一次最多只允许插入50条记录,否则影响服务器响应
if (isUpdate && i % 50 == 0 && newBookIndexList.size() > 0) {
bookService.insertIndexListAndContentList(newBookIndexList, newContentList);
newBookIndexList = new ArrayList<>();
newContentList = new ArrayList<>();
try {
Thread.sleep(1000 * 60 * 1);
Thread.sleep(1000 * 30);
} catch (InterruptedException e) {
log.error(e.getMessage(), e);
throw new RuntimeException(e.getMessage());
@ -442,4 +444,42 @@ public class BookService {
bookExample.createCriteria().andPicUrlLike('%'+fileName+'%');
return bookMapper.countByExample(bookExample);
}
/**
* 添加解析日志
* */
public void addBookParseLog(String bookUrl, String bookName, Float score) {
BookParseLogExample example = new BookParseLogExample();
example.createCriteria().andBookUrlEqualTo(bookUrl);
if(bookParseLogMapper.countByExample(example)==0) {
BookParseLog bookParseLog = new BookParseLog();
bookParseLog.setBookUrl(bookUrl);
bookParseLog.setBookName(bookName);
bookParseLog.setScore(score);
bookParseLog.setCreateTime(new Date());
bookParseLogMapper.insertSelective(bookParseLog);
}
}
/**
* 查询解析日志
* */
public List<BookParseLog> queryBookParseLogs() {
PageHelper.startPage(1,100);
BookParseLogExample example = new BookParseLogExample();
example.setOrderByClause("create_time desc");
List<BookParseLog> logs = bookParseLogMapper.selectByExample(example);
return logs;
}
/**
* 删除已经成功更新的解析日志
* */
public void deleteBookParseLogs(List<Long> successLogIds) {
if(successLogIds.size()>0) {
BookParseLogExample example = new BookParseLogExample();
example.createCriteria().andIdIn(successLogIds);
bookParseLogMapper.deleteByExample(example);
}
}
}

View File

@ -66,7 +66,7 @@ public class IndexController {
if (newBooks == null) {
//查询最近更新数据
newBooks = bookService.search(1, 20, null, null, null, null, null, null, null, "update_time", "DESC");
commonCacheUtil.setObject(CacheKeyConstans.NEWST_BOOK_LIST_KEY, newBooks, 60 * 30);
commonCacheUtil.setObject(CacheKeyConstans.NEWST_BOOK_LIST_KEY, newBooks, 60 * 10);
}
modelMap.put("recBooks", recBooks);
modelMap.put("hotBooks", hotBooks);

View File

@ -7,9 +7,9 @@
<property name="suppressAllComments" value="true" />
</commentGenerator>
<jdbcConnection
connectionURL="jdbc:mysql://localhost:3306/books?useUnicode=true&amp;characterEncoding=utf-8"
driverClass="com.mysql.jdbc.Driver" password="books"
userId="books" />
connectionURL="jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&amp;characterEncoding=utf-8"
driverClass="com.mysql.jdbc.Driver" password="test123456"
userId="root" />
<!-- 默认false把JDBC DECIMAL 和 NUMERIC 类型解析为 Integer 为 true时把JDBC DECIMAL
和 NUMERIC 类型解析为java.math.BigDecimal -->
@ -42,7 +42,7 @@
<property name="enableSubPackages" value="false" />
</javaClientGenerator>
<table schema="books" tableName="book_content"/>
<table tableName="book_parse_log"/>
<!-- 指定数据库表 -->
<!--<table schema="jly" tableName="job_position" domainObjectName="JobPositionTest"/>-->

View File

@ -0,0 +1,211 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd" >
<mapper namespace="xyz.zinglizingli.books.mapper.BookParseLogMapper" >
<resultMap id="BaseResultMap" type="xyz.zinglizingli.books.po.BookParseLog" >
<id column="id" property="id" jdbcType="BIGINT" />
<result column="book_url" property="bookUrl" jdbcType="VARCHAR" />
<result column="book_name" property="bookName" jdbcType="VARCHAR" />
<result column="score" property="score" jdbcType="REAL" />
<result column="create_time" property="createTime" jdbcType="TIMESTAMP" />
</resultMap>
<sql id="Example_Where_Clause" >
<where >
<foreach collection="oredCriteria" item="criteria" separator="or" >
<if test="criteria.valid" >
<trim prefix="(" suffix=")" prefixOverrides="and" >
<foreach collection="criteria.criteria" item="criterion" >
<choose >
<when test="criterion.noValue" >
and ${criterion.condition}
</when>
<when test="criterion.singleValue" >
and ${criterion.condition} #{criterion.value}
</when>
<when test="criterion.betweenValue" >
and ${criterion.condition} #{criterion.value} and #{criterion.secondValue}
</when>
<when test="criterion.listValue" >
and ${criterion.condition}
<foreach collection="criterion.value" item="listItem" open="(" close=")" separator="," >
#{listItem}
</foreach>
</when>
</choose>
</foreach>
</trim>
</if>
</foreach>
</where>
</sql>
<sql id="Update_By_Example_Where_Clause" >
<where >
<foreach collection="example.oredCriteria" item="criteria" separator="or" >
<if test="criteria.valid" >
<trim prefix="(" suffix=")" prefixOverrides="and" >
<foreach collection="criteria.criteria" item="criterion" >
<choose >
<when test="criterion.noValue" >
and ${criterion.condition}
</when>
<when test="criterion.singleValue" >
and ${criterion.condition} #{criterion.value}
</when>
<when test="criterion.betweenValue" >
and ${criterion.condition} #{criterion.value} and #{criterion.secondValue}
</when>
<when test="criterion.listValue" >
and ${criterion.condition}
<foreach collection="criterion.value" item="listItem" open="(" close=")" separator="," >
#{listItem}
</foreach>
</when>
</choose>
</foreach>
</trim>
</if>
</foreach>
</where>
</sql>
<sql id="Base_Column_List" >
id, book_url, book_name, score, create_time
</sql>
<select id="selectByExample" resultMap="BaseResultMap" parameterType="xyz.zinglizingli.books.po.BookParseLogExample" >
select
<if test="distinct" >
distinct
</if>
<include refid="Base_Column_List" />
from book_parse_log
<if test="_parameter != null" >
<include refid="Example_Where_Clause" />
</if>
<if test="orderByClause != null" >
order by ${orderByClause}
</if>
</select>
<select id="selectByPrimaryKey" resultMap="BaseResultMap" parameterType="java.lang.Long" >
select
<include refid="Base_Column_List" />
from book_parse_log
where id = #{id,jdbcType=BIGINT}
</select>
<delete id="deleteByPrimaryKey" parameterType="java.lang.Long" >
delete from book_parse_log
where id = #{id,jdbcType=BIGINT}
</delete>
<delete id="deleteByExample" parameterType="xyz.zinglizingli.books.po.BookParseLogExample" >
delete from book_parse_log
<if test="_parameter != null" >
<include refid="Example_Where_Clause" />
</if>
</delete>
<insert id="insert" parameterType="xyz.zinglizingli.books.po.BookParseLog" >
insert into book_parse_log (id, book_url, book_name,
score, create_time)
values (#{id,jdbcType=BIGINT}, #{bookUrl,jdbcType=VARCHAR}, #{bookName,jdbcType=VARCHAR},
#{score,jdbcType=REAL}, #{createTime,jdbcType=TIMESTAMP})
</insert>
<insert id="insertSelective" parameterType="xyz.zinglizingli.books.po.BookParseLog" >
insert into book_parse_log
<trim prefix="(" suffix=")" suffixOverrides="," >
<if test="id != null" >
id,
</if>
<if test="bookUrl != null" >
book_url,
</if>
<if test="bookName != null" >
book_name,
</if>
<if test="score != null" >
score,
</if>
<if test="createTime != null" >
create_time,
</if>
</trim>
<trim prefix="values (" suffix=")" suffixOverrides="," >
<if test="id != null" >
#{id,jdbcType=BIGINT},
</if>
<if test="bookUrl != null" >
#{bookUrl,jdbcType=VARCHAR},
</if>
<if test="bookName != null" >
#{bookName,jdbcType=VARCHAR},
</if>
<if test="score != null" >
#{score,jdbcType=REAL},
</if>
<if test="createTime != null" >
#{createTime,jdbcType=TIMESTAMP},
</if>
</trim>
</insert>
<select id="countByExample" parameterType="xyz.zinglizingli.books.po.BookParseLogExample" resultType="java.lang.Integer" >
select count(*) from book_parse_log
<if test="_parameter != null" >
<include refid="Example_Where_Clause" />
</if>
</select>
<update id="updateByExampleSelective" parameterType="map" >
update book_parse_log
<set >
<if test="record.id != null" >
id = #{record.id,jdbcType=BIGINT},
</if>
<if test="record.bookUrl != null" >
book_url = #{record.bookUrl,jdbcType=VARCHAR},
</if>
<if test="record.bookName != null" >
book_name = #{record.bookName,jdbcType=VARCHAR},
</if>
<if test="record.score != null" >
score = #{record.score,jdbcType=REAL},
</if>
<if test="record.createTime != null" >
create_time = #{record.createTime,jdbcType=TIMESTAMP},
</if>
</set>
<if test="_parameter != null" >
<include refid="Update_By_Example_Where_Clause" />
</if>
</update>
<update id="updateByExample" parameterType="map" >
update book_parse_log
set id = #{record.id,jdbcType=BIGINT},
book_url = #{record.bookUrl,jdbcType=VARCHAR},
book_name = #{record.bookName,jdbcType=VARCHAR},
score = #{record.score,jdbcType=REAL},
create_time = #{record.createTime,jdbcType=TIMESTAMP}
<if test="_parameter != null" >
<include refid="Update_By_Example_Where_Clause" />
</if>
</update>
<update id="updateByPrimaryKeySelective" parameterType="xyz.zinglizingli.books.po.BookParseLog" >
update book_parse_log
<set >
<if test="bookUrl != null" >
book_url = #{bookUrl,jdbcType=VARCHAR},
</if>
<if test="bookName != null" >
book_name = #{bookName,jdbcType=VARCHAR},
</if>
<if test="score != null" >
score = #{score,jdbcType=REAL},
</if>
<if test="createTime != null" >
create_time = #{createTime,jdbcType=TIMESTAMP},
</if>
</set>
where id = #{id,jdbcType=BIGINT}
</update>
<update id="updateByPrimaryKey" parameterType="xyz.zinglizingli.books.po.BookParseLog" >
update book_parse_log
set book_url = #{bookUrl,jdbcType=VARCHAR},
book_name = #{bookName,jdbcType=VARCHAR},
score = #{score,jdbcType=REAL},
create_time = #{createTime,jdbcType=TIMESTAMP}
where id = #{id,jdbcType=BIGINT}
</update>
</mapper>