增加自动更新线程的配置

This commit is contained in:
xxy 2020-05-08 07:49:41 +08:00
parent 0e2e610d18
commit c9428bf0e7
3 changed files with 52 additions and 35 deletions

View File

@ -9,9 +9,11 @@ import com.java2nb.novel.entity.BookIndex;
import com.java2nb.novel.entity.CrawlSource;
import com.java2nb.novel.service.BookService;
import com.java2nb.novel.service.CrawlService;
import com.java2nb.novel.utils.Constants;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.time.DateUtils;
import org.springframework.beans.factory.annotation.Value;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
@ -33,43 +35,54 @@ public class StarterListener implements ServletContextListener {
private final CrawlService crawlService;
@Value("${crawl.update.thread}")
private int updateThreadCount;
@Override
public void contextInitialized(ServletContextEvent sce) {
log.info("程序启动,开始执行自动更新线程。。。");
new Thread(() -> {
while (true) {
try {
//1.查询最新目录更新时间在一个月之内的前100条需要更新的数据
Date currentDate = new Date();
Date startDate = DateUtils.addDays(currentDate, -30);
List<Book> bookList = bookService.queryNeedUpdateBook(startDate, 100);
for (Book needUpdateBook : bookList) {
try {
//查询爬虫源规则
CrawlSource source = crawlService.queryCrawlSource(needUpdateBook.getCrawlSourceId());
RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class);
//解析小说基本信息
Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId());
//这里只做老书更新
book.setId(needUpdateBook.getId());
book.setPicUrl(needUpdateBook.getPicUrl());
//查询已存在的章节
Map<Integer, BookIndex> existBookIndexMap = bookService.queryExistBookIndexMap(needUpdateBook.getId());
//解析章节目录
Map<Integer, List> indexAndContentList = CrawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(),book, ruleBean, existBookIndexMap);
bookService.updateBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY),existBookIndexMap);
}catch (Exception e){
log.error(e.getMessage(), e);
for(int i = 0 ; i<updateThreadCount; i++) {
new Thread(() -> {
while (true) {
try {
//1.查询最新目录更新时间在一个月之内的前100条需要更新的数据
Date currentDate = new Date();
Date startDate = DateUtils.addDays(currentDate, -30);
List<Book> bookList ;
synchronized (this) {
bookList = bookService.queryNeedUpdateBook(startDate, 100);
}
for (Book needUpdateBook : bookList) {
try {
//查询爬虫源规则
CrawlSource source = crawlService.queryCrawlSource(needUpdateBook.getCrawlSourceId());
RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class);
//解析小说基本信息
Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId());
//这里只做老书更新
book.setId(needUpdateBook.getId());
if(needUpdateBook.getPicUrl()!=null && needUpdateBook.getPicUrl().startsWith(Constants.LOCAL_PIC_PREFIX)) {
//本地图片则不更新
book.setPicUrl(null);
}
//查询已存在的章节
Map<Integer, BookIndex> existBookIndexMap = bookService.queryExistBookIndexMap(needUpdateBook.getId());
//解析章节目录
Map<Integer, List> indexAndContentList = CrawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book, ruleBean, existBookIndexMap);
bookService.updateBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY), existBookIndexMap);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
Thread.sleep(1000 * 60 * 10);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
Thread.sleep(1000 * 60 * 10);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
}).start();
}).start();
}
}
}

View File

@ -180,10 +180,6 @@ public class BookServiceImpl implements BookService {
if(Constants.VISIT_COUNT_DEFAULT.equals(book.getVisitCount())) {
book.setVisitCount(null);
}
if(book.getPicUrl()!=null && book.getPicUrl().startsWith(Constants.LOCAL_PIC_PREFIX)) {
//本地图片则不更新
book.setPicUrl(null);
}
bookMapper.updateByPrimaryKeySelective(book);
}

View File

@ -1,3 +1,4 @@
#端口号
server:
port: 8081
@ -5,9 +6,16 @@ spring:
profiles:
active: dev
#登录用户名密码
admin:
username: admin
password: admin
#爬虫自动更新的线程数
#建议小说数量不多或者正在运行新书入库爬虫的情况下设置为1即可
#随着小说数量的增多可以逐渐增加但建议不要超出CPU的线程数
crawl:
update:
thread: 1