mirror of
https://github.com/201206030/novel-plus.git
synced 2025-07-05 00:36:39 +00:00
增加单本采集任务
This commit is contained in:
@ -3,11 +3,12 @@ package com.java2nb.novel.controller;
|
||||
import com.github.pagehelper.PageInfo;
|
||||
import com.java2nb.novel.core.bean.ResultBean;
|
||||
import com.java2nb.novel.core.utils.BeanUtil;
|
||||
import com.java2nb.novel.entity.CrawlSingleTask;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
import com.java2nb.novel.service.CrawlService;
|
||||
import com.java2nb.novel.vo.CrawlSingleTaskVO;
|
||||
import com.java2nb.novel.vo.CrawlSourceVO;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
@ -56,6 +57,38 @@ public class CrawlController {
|
||||
return ResultBean.ok();
|
||||
}
|
||||
|
||||
/**
|
||||
* 新增单本采集任务
|
||||
* */
|
||||
@PostMapping("addCrawlSingleTask")
|
||||
public ResultBean addCrawlSingleTask(CrawlSingleTask singleTask){
|
||||
crawlService.addCrawlSingleTask(singleTask);
|
||||
|
||||
return ResultBean.ok();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 单本采集任务分页列表查询
|
||||
* */
|
||||
@PostMapping("listCrawlSingleTaskByPage")
|
||||
public ResultBean listCrawlSingleTaskByPage(@RequestParam(value = "curr", defaultValue = "1") int page, @RequestParam(value = "limit", defaultValue = "10") int pageSize){
|
||||
|
||||
return ResultBean.ok(new PageInfo<>(BeanUtil.copyList(crawlService.listCrawlSingleTaskByPage(page,pageSize), CrawlSingleTaskVO.class)
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* 删除采集任务
|
||||
* */
|
||||
@PostMapping("delCrawlSingleTask")
|
||||
public ResultBean delCrawlSingleTask(Long id){
|
||||
|
||||
crawlService.delCrawlSingleTask(id);
|
||||
|
||||
return ResultBean.ok();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -3,10 +3,7 @@ package com.java2nb.novel.core.listener;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.java2nb.novel.core.crawl.CrawlParser;
|
||||
import com.java2nb.novel.core.crawl.RuleBean;
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
import com.java2nb.novel.entity.*;
|
||||
import com.java2nb.novel.service.BookService;
|
||||
import com.java2nb.novel.service.CrawlService;
|
||||
import com.java2nb.novel.utils.Constants;
|
||||
@ -40,15 +37,15 @@ public class StarterListener implements ServletContextListener {
|
||||
|
||||
@Override
|
||||
public void contextInitialized(ServletContextEvent sce) {
|
||||
log.info("程序启动,开始执行自动更新线程。。。");
|
||||
for(int i = 0 ; i<updateThreadCount; i++) {
|
||||
for (int i = 0; i < updateThreadCount; i++) {
|
||||
new Thread(() -> {
|
||||
log.info("程序启动,开始执行自动更新线程。。。");
|
||||
while (true) {
|
||||
try {
|
||||
//1.查询最新目录更新时间在一个月之内的前100条需要更新的数据
|
||||
Date currentDate = new Date();
|
||||
Date startDate = DateUtils.addDays(currentDate, -30);
|
||||
List<Book> bookList ;
|
||||
List<Book> bookList;
|
||||
synchronized (this) {
|
||||
bookList = bookService.queryNeedUpdateBook(startDate, 100);
|
||||
}
|
||||
@ -61,7 +58,7 @@ public class StarterListener implements ServletContextListener {
|
||||
Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId());
|
||||
//这里只做老书更新
|
||||
book.setId(needUpdateBook.getId());
|
||||
if(needUpdateBook.getPicUrl()!=null && needUpdateBook.getPicUrl().contains(Constants.LOCAL_PIC_PREFIX)) {
|
||||
if (needUpdateBook.getPicUrl() != null && needUpdateBook.getPicUrl().contains(Constants.LOCAL_PIC_PREFIX)) {
|
||||
//本地图片则不更新
|
||||
book.setPicUrl(null);
|
||||
}
|
||||
@ -83,6 +80,42 @@ public class StarterListener implements ServletContextListener {
|
||||
|
||||
}
|
||||
}).start();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
new Thread(() -> {
|
||||
log.info("程序启动,开始执行单本采集任务线程。。。");
|
||||
while (true) {
|
||||
CrawlSingleTask task = null;
|
||||
byte crawlStatus = 0;
|
||||
try {
|
||||
//获取采集任务
|
||||
task = crawlService.getCrawlSingleTask();
|
||||
|
||||
if (task != null) {
|
||||
//查询爬虫规则
|
||||
CrawlSource source = crawlService.queryCrawlSource(task.getSourceId());
|
||||
RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class);
|
||||
|
||||
if (crawlService.parseBookAndSave(task.getCatId(), ruleBean, task.getSourceId(), task.getSourceBookId())) {
|
||||
//采集成功
|
||||
crawlStatus = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Thread.sleep(1000 * 60);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
if (task != null) {
|
||||
crawlService.updateCrawlSingleTask(task, crawlStatus);
|
||||
}
|
||||
|
||||
}
|
||||
}).start();
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package com.java2nb.novel.service;
|
||||
|
||||
import com.java2nb.novel.core.crawl.RuleBean;
|
||||
import com.java2nb.novel.entity.CrawlSingleTask;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
|
||||
import java.util.List;
|
||||
@ -39,6 +40,16 @@ public interface CrawlService {
|
||||
* */
|
||||
void updateCrawlSourceStatus(Integer sourceId, Byte sourceStatus);
|
||||
|
||||
/**
|
||||
* 采集并保存小说
|
||||
* @param catId 分类ID
|
||||
* @param bookId 小说ID
|
||||
* @param sourceId 源ID
|
||||
* @param ruleBean 采集规则\
|
||||
* @return true:成功,false:失败
|
||||
* */
|
||||
boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId);
|
||||
|
||||
/**
|
||||
* 根据爬虫状态查询爬虫源集合
|
||||
* @param sourceStatus 状态,0关闭,1开启
|
||||
@ -61,4 +72,37 @@ public interface CrawlService {
|
||||
* @return 源信息
|
||||
* */
|
||||
CrawlSource queryCrawlSource(Integer sourceId);
|
||||
|
||||
/**
|
||||
* 新增单本采集任务
|
||||
* @param singleTask 任务信息对象
|
||||
* */
|
||||
void addCrawlSingleTask(CrawlSingleTask singleTask);
|
||||
|
||||
/**
|
||||
* 单本采集任务分页列表查询
|
||||
* @param page 当前页码
|
||||
* @param pageSize 分页大小
|
||||
* @return 单本采集任务集合
|
||||
* */
|
||||
List<CrawlSingleTask> listCrawlSingleTaskByPage(int page, int pageSize);
|
||||
|
||||
/**
|
||||
* 删除采集任务
|
||||
* @param id 任务ID
|
||||
* */
|
||||
void delCrawlSingleTask(Long id);
|
||||
|
||||
/**
|
||||
* 获取采集任务
|
||||
* @return 采集任务
|
||||
* */
|
||||
CrawlSingleTask getCrawlSingleTask();
|
||||
|
||||
/**
|
||||
* 更新单本采集任务
|
||||
* @param task 采集任务
|
||||
* @param status 采集状态
|
||||
* */
|
||||
void updateCrawlSingleTask(CrawlSingleTask task, Byte status);
|
||||
}
|
||||
|
@ -6,12 +6,12 @@ import com.java2nb.novel.core.cache.CacheKey;
|
||||
import com.java2nb.novel.core.cache.CacheService;
|
||||
import com.java2nb.novel.core.crawl.CrawlParser;
|
||||
import com.java2nb.novel.core.crawl.RuleBean;
|
||||
import com.java2nb.novel.core.enums.ResponseStatus;
|
||||
import com.java2nb.novel.core.exception.BusinessException;
|
||||
import com.java2nb.novel.core.utils.IdWorker;
|
||||
import com.java2nb.novel.core.utils.SpringUtil;
|
||||
import com.java2nb.novel.core.utils.ThreadUtil;
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
import com.java2nb.novel.entity.*;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
import com.java2nb.novel.mapper.*;
|
||||
import com.java2nb.novel.service.BookService;
|
||||
@ -33,8 +33,7 @@ import static com.java2nb.novel.core.utils.HttpUtil.getByHttpClient;
|
||||
import static com.java2nb.novel.mapper.BookDynamicSqlSupport.crawlBookId;
|
||||
import static com.java2nb.novel.mapper.BookDynamicSqlSupport.crawlSourceId;
|
||||
import static com.java2nb.novel.mapper.CrawlSourceDynamicSqlSupport.*;
|
||||
import static org.mybatis.dynamic.sql.SqlBuilder.isEqualTo;
|
||||
import static org.mybatis.dynamic.sql.SqlBuilder.update;
|
||||
import static org.mybatis.dynamic.sql.SqlBuilder.*;
|
||||
import static org.mybatis.dynamic.sql.select.SelectDSL.select;
|
||||
|
||||
/**
|
||||
@ -48,6 +47,8 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
|
||||
private final CrawlSourceMapper crawlSourceMapper;
|
||||
|
||||
private final CrawlSingleTaskMapper crawlSingleTaskMapper;
|
||||
|
||||
private final BookService bookService;
|
||||
|
||||
|
||||
@ -140,6 +141,62 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
return crawlSourceMapper.selectMany(render).get(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addCrawlSingleTask(CrawlSingleTask singleTask) {
|
||||
|
||||
if(bookService.queryIsExistByBookNameAndAuthorName(singleTask.getBookName(),singleTask.getAuthorName())){
|
||||
throw new BusinessException(ResponseStatus.BOOK_EXISTS);
|
||||
|
||||
}
|
||||
singleTask.setCreateTime(new Date());
|
||||
crawlSingleTaskMapper.insertSelective(singleTask);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<CrawlSingleTask> listCrawlSingleTaskByPage(int page, int pageSize) {
|
||||
PageHelper.startPage(page, pageSize);
|
||||
SelectStatementProvider render = select(CrawlSingleTaskDynamicSqlSupport.crawlSingleTask.allColumns())
|
||||
.from(CrawlSingleTaskDynamicSqlSupport.crawlSingleTask)
|
||||
.orderBy(CrawlSingleTaskDynamicSqlSupport.createTime.descending())
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3);
|
||||
return crawlSingleTaskMapper.selectMany(render);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void delCrawlSingleTask(Long id) {
|
||||
crawlSingleTaskMapper.deleteByPrimaryKey(id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CrawlSingleTask getCrawlSingleTask() {
|
||||
|
||||
List<CrawlSingleTask> list = crawlSingleTaskMapper.selectMany(select(CrawlSingleTaskDynamicSqlSupport.crawlSingleTask.allColumns())
|
||||
.from(CrawlSingleTaskDynamicSqlSupport.crawlSingleTask)
|
||||
.where(CrawlSingleTaskDynamicSqlSupport.taskStatus,isEqualTo((byte)2))
|
||||
.orderBy(CrawlSingleTaskDynamicSqlSupport.createTime)
|
||||
.limit(1)
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3));
|
||||
|
||||
return list.size() > 0 ? list.get(0) : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateCrawlSingleTask(CrawlSingleTask task, Byte status) {
|
||||
byte excCount = task.getExcCount();
|
||||
excCount+=1;
|
||||
task.setExcCount(excCount);
|
||||
if(status == 1 || excCount == 5){
|
||||
//当采集成功或者采集次数等于5,则更新采集最终状态,并停止采集
|
||||
task.setTaskStatus(status);
|
||||
}
|
||||
crawlSingleTaskMapper.updateByPrimaryKeySelective(task);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析分类列表
|
||||
*/
|
||||
@ -173,35 +230,7 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
|
||||
|
||||
String bookId = bookIdMatcher.group(1);
|
||||
Book book = CrawlParser.parseBook(ruleBean, bookId);
|
||||
//这里只做新书入库,查询是否存在这本书
|
||||
Book existBook = bookService.queryBookByBookNameAndAuthorName(book.getBookName(), book.getAuthorName());
|
||||
//如果该小说不存在,则可以解析入库,但是标记该小说正在入库,30分钟之后才允许再次入库
|
||||
if (existBook == null) {
|
||||
//没有该书,可以入库
|
||||
book.setCatId(catId);
|
||||
//根据分类ID查询分类
|
||||
book.setCatName(bookService.queryCatNameByCatId(catId));
|
||||
if (catId == 7) {
|
||||
//女频
|
||||
book.setWorkDirection((byte) 1);
|
||||
} else {
|
||||
//男频
|
||||
book.setWorkDirection((byte) 0);
|
||||
}
|
||||
book.setCrawlBookId(bookId);
|
||||
book.setCrawlSourceId(sourceId);
|
||||
book.setCrawlLastTime(new Date());
|
||||
book.setId(new IdWorker().nextId());
|
||||
//解析章节目录
|
||||
Map<Integer, List> indexAndContentList = CrawlParser.parseBookIndexAndContent(bookId, book, ruleBean, new HashMap<>(0));
|
||||
|
||||
bookService.saveBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY));
|
||||
|
||||
} else {
|
||||
//只更新书籍的爬虫相关字段
|
||||
bookService.updateCrawlProperties(existBook.getId(), sourceId, bookId);
|
||||
}
|
||||
parseBookAndSave(catId, ruleBean, sourceId, bookId);
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
@ -232,6 +261,43 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId) {
|
||||
Book book = CrawlParser.parseBook(ruleBean, bookId);
|
||||
if(book.getBookName() == null || book.getAuthorName() == null){
|
||||
return false;
|
||||
}
|
||||
//这里只做新书入库,查询是否存在这本书
|
||||
Book existBook = bookService.queryBookByBookNameAndAuthorName(book.getBookName(), book.getAuthorName());
|
||||
//如果该小说不存在,则可以解析入库,但是标记该小说正在入库,30分钟之后才允许再次入库
|
||||
if (existBook == null) {
|
||||
//没有该书,可以入库
|
||||
book.setCatId(catId);
|
||||
//根据分类ID查询分类
|
||||
book.setCatName(bookService.queryCatNameByCatId(catId));
|
||||
if (catId == 7) {
|
||||
//女频
|
||||
book.setWorkDirection((byte) 1);
|
||||
} else {
|
||||
//男频
|
||||
book.setWorkDirection((byte) 0);
|
||||
}
|
||||
book.setCrawlBookId(bookId);
|
||||
book.setCrawlSourceId(sourceId);
|
||||
book.setCrawlLastTime(new Date());
|
||||
book.setId(new IdWorker().nextId());
|
||||
//解析章节目录
|
||||
Map<Integer, List> indexAndContentList = CrawlParser.parseBookIndexAndContent(bookId, book, ruleBean, new HashMap<>(0));
|
||||
|
||||
bookService.saveBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY));
|
||||
|
||||
} else {
|
||||
//只更新书籍的爬虫相关字段
|
||||
bookService.updateCrawlProperties(existBook.getId(), sourceId, bookId);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateCrawlSourceStatus(Integer sourceId, Byte sourceStatus) {
|
||||
CrawlSource source = new CrawlSource();
|
||||
|
@ -0,0 +1,26 @@
|
||||
package com.java2nb.novel.vo;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import com.java2nb.novel.entity.CrawlSingleTask;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
@Data
|
||||
public class CrawlSingleTaskVO extends CrawlSingleTask {
|
||||
|
||||
@JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm")
|
||||
private Date createTime;
|
||||
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return super.toString();
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user