From c9c714e71e7740ed699c0bd5be14b620b3fdbc92 Mon Sep 17 00:00:00 2001 From: xiongxiaoyang <773861846@qq.com> Date: Mon, 15 Jun 2020 15:08:15 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=8D=95=E6=9C=AC=E9=87=87?= =?UTF-8?q?=E9=9B=86=E4=BB=BB=E5=8A=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../novel/core/enums/ResponseStatus.java | 10 +- .../java2nb/novel/entity/CrawlSingleTask.java | 84 ------- .../CrawlSingleTaskDynamicSqlSupport.java | 49 ---- .../novel/mapper/CrawlSingleTaskMapper.java | 200 --------------- .../resources/mybatis/generatorConfig.xml | 2 +- .../novel/controller/CrawlController.java | 35 ++- .../novel/core/listener/StarterListener.java | 49 +++- .../java2nb/novel/service/CrawlService.java | 44 ++++ .../novel/service/impl/CrawlServiceImpl.java | 134 +++++++--- .../java2nb/novel/vo/CrawlSingleTaskVO.java | 26 ++ .../src/main/resources/static/css/user.css | 4 +- .../templates/crawl/crawlSingleTask_add.html | 188 ++++++++++++++ .../templates/crawl/crawlSingleTask_list.html | 230 ++++++++++++++++++ .../templates/crawl/crawlSource_add.html | 1 + .../templates/crawl/crawlSource_list.html | 1 + sql/20200615.sql | 40 +++ sql/novel_plus.sql | 22 ++ 17 files changed, 738 insertions(+), 381 deletions(-) delete mode 100644 novel-common/src/main/java/com/java2nb/novel/entity/CrawlSingleTask.java delete mode 100644 novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskDynamicSqlSupport.java delete mode 100644 novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskMapper.java create mode 100644 novel-crawl/src/main/java/com/java2nb/novel/vo/CrawlSingleTaskVO.java create mode 100644 novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_add.html create mode 100644 novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_list.html create mode 100644 sql/20200615.sql diff --git a/novel-common/src/main/java/com/java2nb/novel/core/enums/ResponseStatus.java b/novel-common/src/main/java/com/java2nb/novel/core/enums/ResponseStatus.java index f0a74da..e3fb8bd 100644 --- a/novel-common/src/main/java/com/java2nb/novel/core/enums/ResponseStatus.java +++ b/novel-common/src/main/java/com/java2nb/novel/core/enums/ResponseStatus.java @@ -55,9 +55,14 @@ public enum ResponseStatus { * */ INVITE_CODE_INVALID(4001, "邀请码无效!"), AUTHOR_STATUS_FORBIDDEN(4002, "作者状态异常,暂不能管理小说!") - , BOOKNAME_EXISTS(4003,"已发布过同名小说!") + , BOOKNAME_EXISTS(4003,"已发布过同名小说!"), - , + /** + * 小说相关错误 + */ + BOOK_EXISTS(5001,"该小说已存在") + + , /** * 搜索引擎相关错误 * */ @@ -68,6 +73,7 @@ public enum ResponseStatus { * 其他通用错误 * */ PASSWORD_ERROR(88001,"密码错误!"); + private int code; private String msg; diff --git a/novel-common/src/main/java/com/java2nb/novel/entity/CrawlSingleTask.java b/novel-common/src/main/java/com/java2nb/novel/entity/CrawlSingleTask.java deleted file mode 100644 index ad6bb59..0000000 --- a/novel-common/src/main/java/com/java2nb/novel/entity/CrawlSingleTask.java +++ /dev/null @@ -1,84 +0,0 @@ -package com.java2nb.novel.entity; - -import java.util.Date; -import javax.annotation.Generated; - -public class CrawlSingleTask { - @Generated("org.mybatis.generator.api.MyBatisGenerator") - private Long id; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - private Integer sourceId; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - private String sourceBookId; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - private Byte taskStatus; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - private Byte excCount; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - private Date createTime; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public Long getId() { - return id; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public void setId(Long id) { - this.id = id; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public Integer getSourceId() { - return sourceId; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public void setSourceId(Integer sourceId) { - this.sourceId = sourceId; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public String getSourceBookId() { - return sourceBookId; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public void setSourceBookId(String sourceBookId) { - this.sourceBookId = sourceBookId == null ? null : sourceBookId.trim(); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public Byte getTaskStatus() { - return taskStatus; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public void setTaskStatus(Byte taskStatus) { - this.taskStatus = taskStatus; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public Byte getExcCount() { - return excCount; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public void setExcCount(Byte excCount) { - this.excCount = excCount; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public Date getCreateTime() { - return createTime; - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public void setCreateTime(Date createTime) { - this.createTime = createTime; - } -} \ No newline at end of file diff --git a/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskDynamicSqlSupport.java b/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskDynamicSqlSupport.java deleted file mode 100644 index 34b00d5..0000000 --- a/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskDynamicSqlSupport.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.java2nb.novel.mapper; - -import java.sql.JDBCType; -import java.util.Date; -import javax.annotation.Generated; -import org.mybatis.dynamic.sql.SqlColumn; -import org.mybatis.dynamic.sql.SqlTable; - -public final class CrawlSingleTaskDynamicSqlSupport { - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public static final CrawlSingleTask crawlSingleTask = new CrawlSingleTask(); - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public static final SqlColumn id = crawlSingleTask.id; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public static final SqlColumn sourceId = crawlSingleTask.sourceId; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public static final SqlColumn sourceBookId = crawlSingleTask.sourceBookId; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public static final SqlColumn taskStatus = crawlSingleTask.taskStatus; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public static final SqlColumn excCount = crawlSingleTask.excCount; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public static final SqlColumn createTime = crawlSingleTask.createTime; - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - public static final class CrawlSingleTask extends SqlTable { - public final SqlColumn id = column("id", JDBCType.BIGINT); - - public final SqlColumn sourceId = column("source_id", JDBCType.INTEGER); - - public final SqlColumn sourceBookId = column("source_book_id", JDBCType.VARCHAR); - - public final SqlColumn taskStatus = column("task_status", JDBCType.TINYINT); - - public final SqlColumn excCount = column("exc_count", JDBCType.TINYINT); - - public final SqlColumn createTime = column("create_time", JDBCType.TIMESTAMP); - - public CrawlSingleTask() { - super("crawl_single_task"); - } - } -} \ No newline at end of file diff --git a/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskMapper.java b/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskMapper.java deleted file mode 100644 index 7a37f22..0000000 --- a/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskMapper.java +++ /dev/null @@ -1,200 +0,0 @@ -package com.java2nb.novel.mapper; - -import static com.java2nb.novel.mapper.CrawlSingleTaskDynamicSqlSupport.*; -import static org.mybatis.dynamic.sql.SqlBuilder.*; - -import com.java2nb.novel.entity.CrawlSingleTask; -import java.util.Collection; -import java.util.List; -import java.util.Optional; -import javax.annotation.Generated; -import org.apache.ibatis.annotations.DeleteProvider; -import org.apache.ibatis.annotations.InsertProvider; -import org.apache.ibatis.annotations.Mapper; -import org.apache.ibatis.annotations.Result; -import org.apache.ibatis.annotations.ResultMap; -import org.apache.ibatis.annotations.Results; -import org.apache.ibatis.annotations.SelectProvider; -import org.apache.ibatis.annotations.UpdateProvider; -import org.apache.ibatis.type.JdbcType; -import org.mybatis.dynamic.sql.BasicColumn; -import org.mybatis.dynamic.sql.delete.DeleteDSLCompleter; -import org.mybatis.dynamic.sql.delete.render.DeleteStatementProvider; -import org.mybatis.dynamic.sql.insert.render.InsertStatementProvider; -import org.mybatis.dynamic.sql.insert.render.MultiRowInsertStatementProvider; -import org.mybatis.dynamic.sql.select.CountDSLCompleter; -import org.mybatis.dynamic.sql.select.SelectDSLCompleter; -import org.mybatis.dynamic.sql.select.render.SelectStatementProvider; -import org.mybatis.dynamic.sql.update.UpdateDSL; -import org.mybatis.dynamic.sql.update.UpdateDSLCompleter; -import org.mybatis.dynamic.sql.update.UpdateModel; -import org.mybatis.dynamic.sql.update.render.UpdateStatementProvider; -import org.mybatis.dynamic.sql.util.SqlProviderAdapter; -import org.mybatis.dynamic.sql.util.mybatis3.MyBatis3Utils; - -@Mapper -public interface CrawlSingleTaskMapper { - @Generated("org.mybatis.generator.api.MyBatisGenerator") - BasicColumn[] selectList = BasicColumn.columnList(id, sourceId, sourceBookId, taskStatus, excCount, createTime); - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - @SelectProvider(type=SqlProviderAdapter.class, method="select") - long count(SelectStatementProvider selectStatement); - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - @DeleteProvider(type=SqlProviderAdapter.class, method="delete") - int delete(DeleteStatementProvider deleteStatement); - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - @InsertProvider(type=SqlProviderAdapter.class, method="insert") - int insert(InsertStatementProvider insertStatement); - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - @InsertProvider(type=SqlProviderAdapter.class, method="insertMultiple") - int insertMultiple(MultiRowInsertStatementProvider multipleInsertStatement); - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - @SelectProvider(type=SqlProviderAdapter.class, method="select") - @ResultMap("CrawlSingleTaskResult") - Optional selectOne(SelectStatementProvider selectStatement); - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - @SelectProvider(type=SqlProviderAdapter.class, method="select") - @Results(id="CrawlSingleTaskResult", value = { - @Result(column="id", property="id", jdbcType=JdbcType.BIGINT, id=true), - @Result(column="source_id", property="sourceId", jdbcType=JdbcType.INTEGER), - @Result(column="source_book_id", property="sourceBookId", jdbcType=JdbcType.VARCHAR), - @Result(column="task_status", property="taskStatus", jdbcType=JdbcType.TINYINT), - @Result(column="exc_count", property="excCount", jdbcType=JdbcType.TINYINT), - @Result(column="create_time", property="createTime", jdbcType=JdbcType.TIMESTAMP) - }) - List selectMany(SelectStatementProvider selectStatement); - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - @UpdateProvider(type=SqlProviderAdapter.class, method="update") - int update(UpdateStatementProvider updateStatement); - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default long count(CountDSLCompleter completer) { - return MyBatis3Utils.countFrom(this::count, crawlSingleTask, completer); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int delete(DeleteDSLCompleter completer) { - return MyBatis3Utils.deleteFrom(this::delete, crawlSingleTask, completer); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int deleteByPrimaryKey(Long id_) { - return delete(c -> - c.where(id, isEqualTo(id_)) - ); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int insert(CrawlSingleTask record) { - return MyBatis3Utils.insert(this::insert, record, crawlSingleTask, c -> - c.map(id).toProperty("id") - .map(sourceId).toProperty("sourceId") - .map(sourceBookId).toProperty("sourceBookId") - .map(taskStatus).toProperty("taskStatus") - .map(excCount).toProperty("excCount") - .map(createTime).toProperty("createTime") - ); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int insertMultiple(Collection records) { - return MyBatis3Utils.insertMultiple(this::insertMultiple, records, crawlSingleTask, c -> - c.map(id).toProperty("id") - .map(sourceId).toProperty("sourceId") - .map(sourceBookId).toProperty("sourceBookId") - .map(taskStatus).toProperty("taskStatus") - .map(excCount).toProperty("excCount") - .map(createTime).toProperty("createTime") - ); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int insertSelective(CrawlSingleTask record) { - return MyBatis3Utils.insert(this::insert, record, crawlSingleTask, c -> - c.map(id).toPropertyWhenPresent("id", record::getId) - .map(sourceId).toPropertyWhenPresent("sourceId", record::getSourceId) - .map(sourceBookId).toPropertyWhenPresent("sourceBookId", record::getSourceBookId) - .map(taskStatus).toPropertyWhenPresent("taskStatus", record::getTaskStatus) - .map(excCount).toPropertyWhenPresent("excCount", record::getExcCount) - .map(createTime).toPropertyWhenPresent("createTime", record::getCreateTime) - ); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default Optional selectOne(SelectDSLCompleter completer) { - return MyBatis3Utils.selectOne(this::selectOne, selectList, crawlSingleTask, completer); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default List select(SelectDSLCompleter completer) { - return MyBatis3Utils.selectList(this::selectMany, selectList, crawlSingleTask, completer); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default List selectDistinct(SelectDSLCompleter completer) { - return MyBatis3Utils.selectDistinct(this::selectMany, selectList, crawlSingleTask, completer); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default Optional selectByPrimaryKey(Long id_) { - return selectOne(c -> - c.where(id, isEqualTo(id_)) - ); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int update(UpdateDSLCompleter completer) { - return MyBatis3Utils.update(this::update, crawlSingleTask, completer); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - static UpdateDSL updateAllColumns(CrawlSingleTask record, UpdateDSL dsl) { - return dsl.set(id).equalTo(record::getId) - .set(sourceId).equalTo(record::getSourceId) - .set(sourceBookId).equalTo(record::getSourceBookId) - .set(taskStatus).equalTo(record::getTaskStatus) - .set(excCount).equalTo(record::getExcCount) - .set(createTime).equalTo(record::getCreateTime); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - static UpdateDSL updateSelectiveColumns(CrawlSingleTask record, UpdateDSL dsl) { - return dsl.set(id).equalToWhenPresent(record::getId) - .set(sourceId).equalToWhenPresent(record::getSourceId) - .set(sourceBookId).equalToWhenPresent(record::getSourceBookId) - .set(taskStatus).equalToWhenPresent(record::getTaskStatus) - .set(excCount).equalToWhenPresent(record::getExcCount) - .set(createTime).equalToWhenPresent(record::getCreateTime); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int updateByPrimaryKey(CrawlSingleTask record) { - return update(c -> - c.set(sourceId).equalTo(record::getSourceId) - .set(sourceBookId).equalTo(record::getSourceBookId) - .set(taskStatus).equalTo(record::getTaskStatus) - .set(excCount).equalTo(record::getExcCount) - .set(createTime).equalTo(record::getCreateTime) - .where(id, isEqualTo(record::getId)) - ); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int updateByPrimaryKeySelective(CrawlSingleTask record) { - return update(c -> - c.set(sourceId).equalToWhenPresent(record::getSourceId) - .set(sourceBookId).equalToWhenPresent(record::getSourceBookId) - .set(taskStatus).equalToWhenPresent(record::getTaskStatus) - .set(excCount).equalToWhenPresent(record::getExcCount) - .set(createTime).equalToWhenPresent(record::getCreateTime) - .where(id, isEqualTo(record::getId)) - ); - } -} \ No newline at end of file diff --git a/novel-common/src/main/resources/mybatis/generatorConfig.xml b/novel-common/src/main/resources/mybatis/generatorConfig.xml index c4fafbf..b559604 100644 --- a/novel-common/src/main/resources/mybatis/generatorConfig.xml +++ b/novel-common/src/main/resources/mybatis/generatorConfig.xml @@ -44,7 +44,7 @@ - +
diff --git a/novel-crawl/src/main/java/com/java2nb/novel/controller/CrawlController.java b/novel-crawl/src/main/java/com/java2nb/novel/controller/CrawlController.java index 28c3998..da39ef9 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/controller/CrawlController.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/controller/CrawlController.java @@ -3,11 +3,12 @@ package com.java2nb.novel.controller; import com.github.pagehelper.PageInfo; import com.java2nb.novel.core.bean.ResultBean; import com.java2nb.novel.core.utils.BeanUtil; +import com.java2nb.novel.entity.CrawlSingleTask; import com.java2nb.novel.entity.CrawlSource; import com.java2nb.novel.service.CrawlService; +import com.java2nb.novel.vo.CrawlSingleTaskVO; import com.java2nb.novel.vo.CrawlSourceVO; import lombok.RequiredArgsConstructor; -import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; @@ -56,6 +57,38 @@ public class CrawlController { return ResultBean.ok(); } + /** + * 新增单本采集任务 + * */ + @PostMapping("addCrawlSingleTask") + public ResultBean addCrawlSingleTask(CrawlSingleTask singleTask){ + crawlService.addCrawlSingleTask(singleTask); + + return ResultBean.ok(); + + } + + /** + * 单本采集任务分页列表查询 + * */ + @PostMapping("listCrawlSingleTaskByPage") + public ResultBean listCrawlSingleTaskByPage(@RequestParam(value = "curr", defaultValue = "1") int page, @RequestParam(value = "limit", defaultValue = "10") int pageSize){ + + return ResultBean.ok(new PageInfo<>(BeanUtil.copyList(crawlService.listCrawlSingleTaskByPage(page,pageSize), CrawlSingleTaskVO.class) + )); + } + + /** + * 删除采集任务 + * */ + @PostMapping("delCrawlSingleTask") + public ResultBean delCrawlSingleTask(Long id){ + + crawlService.delCrawlSingleTask(id); + + return ResultBean.ok(); + } + diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java b/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java index 0e26ad5..1efb692 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java @@ -3,10 +3,7 @@ package com.java2nb.novel.core.listener; import com.fasterxml.jackson.databind.ObjectMapper; import com.java2nb.novel.core.crawl.CrawlParser; import com.java2nb.novel.core.crawl.RuleBean; -import com.java2nb.novel.entity.Book; -import com.java2nb.novel.entity.BookContent; -import com.java2nb.novel.entity.BookIndex; -import com.java2nb.novel.entity.CrawlSource; +import com.java2nb.novel.entity.*; import com.java2nb.novel.service.BookService; import com.java2nb.novel.service.CrawlService; import com.java2nb.novel.utils.Constants; @@ -40,15 +37,15 @@ public class StarterListener implements ServletContextListener { @Override public void contextInitialized(ServletContextEvent sce) { - log.info("程序启动,开始执行自动更新线程。。。"); - for(int i = 0 ; i { + log.info("程序启动,开始执行自动更新线程。。。"); while (true) { try { //1.查询最新目录更新时间在一个月之内的前100条需要更新的数据 Date currentDate = new Date(); Date startDate = DateUtils.addDays(currentDate, -30); - List bookList ; + List bookList; synchronized (this) { bookList = bookService.queryNeedUpdateBook(startDate, 100); } @@ -61,7 +58,7 @@ public class StarterListener implements ServletContextListener { Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId()); //这里只做老书更新 book.setId(needUpdateBook.getId()); - if(needUpdateBook.getPicUrl()!=null && needUpdateBook.getPicUrl().contains(Constants.LOCAL_PIC_PREFIX)) { + if (needUpdateBook.getPicUrl() != null && needUpdateBook.getPicUrl().contains(Constants.LOCAL_PIC_PREFIX)) { //本地图片则不更新 book.setPicUrl(null); } @@ -83,6 +80,42 @@ public class StarterListener implements ServletContextListener { } }).start(); + + } + + + new Thread(() -> { + log.info("程序启动,开始执行单本采集任务线程。。。"); + while (true) { + CrawlSingleTask task = null; + byte crawlStatus = 0; + try { + //获取采集任务 + task = crawlService.getCrawlSingleTask(); + + if (task != null) { + //查询爬虫规则 + CrawlSource source = crawlService.queryCrawlSource(task.getSourceId()); + RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class); + + if (crawlService.parseBookAndSave(task.getCatId(), ruleBean, task.getSourceId(), task.getSourceBookId())) { + //采集成功 + crawlStatus = 1; + } + + } + + Thread.sleep(1000 * 60); + + } catch (Exception e) { + log.error(e.getMessage(), e); + } + if (task != null) { + crawlService.updateCrawlSingleTask(task, crawlStatus); + } + + } + }).start(); } } diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/CrawlService.java b/novel-crawl/src/main/java/com/java2nb/novel/service/CrawlService.java index ed00ae8..7c465a7 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/service/CrawlService.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/service/CrawlService.java @@ -1,6 +1,7 @@ package com.java2nb.novel.service; import com.java2nb.novel.core.crawl.RuleBean; +import com.java2nb.novel.entity.CrawlSingleTask; import com.java2nb.novel.entity.CrawlSource; import java.util.List; @@ -39,6 +40,16 @@ public interface CrawlService { * */ void updateCrawlSourceStatus(Integer sourceId, Byte sourceStatus); + /** + * 采集并保存小说 + * @param catId 分类ID + * @param bookId 小说ID + * @param sourceId 源ID + * @param ruleBean 采集规则\ + * @return true:成功,false:失败 + * */ + boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId); + /** * 根据爬虫状态查询爬虫源集合 * @param sourceStatus 状态,0关闭,1开启 @@ -61,4 +72,37 @@ public interface CrawlService { * @return 源信息 * */ CrawlSource queryCrawlSource(Integer sourceId); + + /** + * 新增单本采集任务 + * @param singleTask 任务信息对象 + * */ + void addCrawlSingleTask(CrawlSingleTask singleTask); + + /** + * 单本采集任务分页列表查询 + * @param page 当前页码 + * @param pageSize 分页大小 + * @return 单本采集任务集合 + * */ + List listCrawlSingleTaskByPage(int page, int pageSize); + + /** + * 删除采集任务 + * @param id 任务ID + * */ + void delCrawlSingleTask(Long id); + + /** + * 获取采集任务 + * @return 采集任务 + * */ + CrawlSingleTask getCrawlSingleTask(); + + /** + * 更新单本采集任务 + * @param task 采集任务 + * @param status 采集状态 + * */ + void updateCrawlSingleTask(CrawlSingleTask task, Byte status); } diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java index dd2e54c..f3c49bd 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java @@ -6,12 +6,12 @@ import com.java2nb.novel.core.cache.CacheKey; import com.java2nb.novel.core.cache.CacheService; import com.java2nb.novel.core.crawl.CrawlParser; import com.java2nb.novel.core.crawl.RuleBean; +import com.java2nb.novel.core.enums.ResponseStatus; +import com.java2nb.novel.core.exception.BusinessException; import com.java2nb.novel.core.utils.IdWorker; import com.java2nb.novel.core.utils.SpringUtil; import com.java2nb.novel.core.utils.ThreadUtil; -import com.java2nb.novel.entity.Book; -import com.java2nb.novel.entity.BookContent; -import com.java2nb.novel.entity.BookIndex; +import com.java2nb.novel.entity.*; import com.java2nb.novel.entity.CrawlSource; import com.java2nb.novel.mapper.*; import com.java2nb.novel.service.BookService; @@ -33,8 +33,7 @@ import static com.java2nb.novel.core.utils.HttpUtil.getByHttpClient; import static com.java2nb.novel.mapper.BookDynamicSqlSupport.crawlBookId; import static com.java2nb.novel.mapper.BookDynamicSqlSupport.crawlSourceId; import static com.java2nb.novel.mapper.CrawlSourceDynamicSqlSupport.*; -import static org.mybatis.dynamic.sql.SqlBuilder.isEqualTo; -import static org.mybatis.dynamic.sql.SqlBuilder.update; +import static org.mybatis.dynamic.sql.SqlBuilder.*; import static org.mybatis.dynamic.sql.select.SelectDSL.select; /** @@ -48,6 +47,8 @@ public class CrawlServiceImpl implements CrawlService { private final CrawlSourceMapper crawlSourceMapper; + private final CrawlSingleTaskMapper crawlSingleTaskMapper; + private final BookService bookService; @@ -140,6 +141,62 @@ public class CrawlServiceImpl implements CrawlService { return crawlSourceMapper.selectMany(render).get(0); } + @Override + public void addCrawlSingleTask(CrawlSingleTask singleTask) { + + if(bookService.queryIsExistByBookNameAndAuthorName(singleTask.getBookName(),singleTask.getAuthorName())){ + throw new BusinessException(ResponseStatus.BOOK_EXISTS); + + } + singleTask.setCreateTime(new Date()); + crawlSingleTaskMapper.insertSelective(singleTask); + + + } + + @Override + public List listCrawlSingleTaskByPage(int page, int pageSize) { + PageHelper.startPage(page, pageSize); + SelectStatementProvider render = select(CrawlSingleTaskDynamicSqlSupport.crawlSingleTask.allColumns()) + .from(CrawlSingleTaskDynamicSqlSupport.crawlSingleTask) + .orderBy(CrawlSingleTaskDynamicSqlSupport.createTime.descending()) + .build() + .render(RenderingStrategies.MYBATIS3); + return crawlSingleTaskMapper.selectMany(render); + } + + @Override + public void delCrawlSingleTask(Long id) { + crawlSingleTaskMapper.deleteByPrimaryKey(id); + } + + @Override + public CrawlSingleTask getCrawlSingleTask() { + + List list = crawlSingleTaskMapper.selectMany(select(CrawlSingleTaskDynamicSqlSupport.crawlSingleTask.allColumns()) + .from(CrawlSingleTaskDynamicSqlSupport.crawlSingleTask) + .where(CrawlSingleTaskDynamicSqlSupport.taskStatus,isEqualTo((byte)2)) + .orderBy(CrawlSingleTaskDynamicSqlSupport.createTime) + .limit(1) + .build() + .render(RenderingStrategies.MYBATIS3)); + + return list.size() > 0 ? list.get(0) : null; + } + + @Override + public void updateCrawlSingleTask(CrawlSingleTask task, Byte status) { + byte excCount = task.getExcCount(); + excCount+=1; + task.setExcCount(excCount); + if(status == 1 || excCount == 5){ + //当采集成功或者采集次数等于5,则更新采集最终状态,并停止采集 + task.setTaskStatus(status); + } + crawlSingleTaskMapper.updateByPrimaryKeySelective(task); + + } + /** * 解析分类列表 */ @@ -173,35 +230,7 @@ public class CrawlServiceImpl implements CrawlService { String bookId = bookIdMatcher.group(1); - Book book = CrawlParser.parseBook(ruleBean, bookId); - //这里只做新书入库,查询是否存在这本书 - Book existBook = bookService.queryBookByBookNameAndAuthorName(book.getBookName(), book.getAuthorName()); - //如果该小说不存在,则可以解析入库,但是标记该小说正在入库,30分钟之后才允许再次入库 - if (existBook == null) { - //没有该书,可以入库 - book.setCatId(catId); - //根据分类ID查询分类 - book.setCatName(bookService.queryCatNameByCatId(catId)); - if (catId == 7) { - //女频 - book.setWorkDirection((byte) 1); - } else { - //男频 - book.setWorkDirection((byte) 0); - } - book.setCrawlBookId(bookId); - book.setCrawlSourceId(sourceId); - book.setCrawlLastTime(new Date()); - book.setId(new IdWorker().nextId()); - //解析章节目录 - Map indexAndContentList = CrawlParser.parseBookIndexAndContent(bookId, book, ruleBean, new HashMap<>(0)); - - bookService.saveBookAndIndexAndContent(book, (List) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY)); - - } else { - //只更新书籍的爬虫相关字段 - bookService.updateCrawlProperties(existBook.getId(), sourceId, bookId); - } + parseBookAndSave(catId, ruleBean, sourceId, bookId); } catch (Exception e) { log.error(e.getMessage(), e); } @@ -232,6 +261,43 @@ public class CrawlServiceImpl implements CrawlService { } + @Override + public boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId) { + Book book = CrawlParser.parseBook(ruleBean, bookId); + if(book.getBookName() == null || book.getAuthorName() == null){ + return false; + } + //这里只做新书入库,查询是否存在这本书 + Book existBook = bookService.queryBookByBookNameAndAuthorName(book.getBookName(), book.getAuthorName()); + //如果该小说不存在,则可以解析入库,但是标记该小说正在入库,30分钟之后才允许再次入库 + if (existBook == null) { + //没有该书,可以入库 + book.setCatId(catId); + //根据分类ID查询分类 + book.setCatName(bookService.queryCatNameByCatId(catId)); + if (catId == 7) { + //女频 + book.setWorkDirection((byte) 1); + } else { + //男频 + book.setWorkDirection((byte) 0); + } + book.setCrawlBookId(bookId); + book.setCrawlSourceId(sourceId); + book.setCrawlLastTime(new Date()); + book.setId(new IdWorker().nextId()); + //解析章节目录 + Map indexAndContentList = CrawlParser.parseBookIndexAndContent(bookId, book, ruleBean, new HashMap<>(0)); + + bookService.saveBookAndIndexAndContent(book, (List) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY)); + + } else { + //只更新书籍的爬虫相关字段 + bookService.updateCrawlProperties(existBook.getId(), sourceId, bookId); + } + return true; + } + @Override public void updateCrawlSourceStatus(Integer sourceId, Byte sourceStatus) { CrawlSource source = new CrawlSource(); diff --git a/novel-crawl/src/main/java/com/java2nb/novel/vo/CrawlSingleTaskVO.java b/novel-crawl/src/main/java/com/java2nb/novel/vo/CrawlSingleTaskVO.java new file mode 100644 index 0000000..4583856 --- /dev/null +++ b/novel-crawl/src/main/java/com/java2nb/novel/vo/CrawlSingleTaskVO.java @@ -0,0 +1,26 @@ +package com.java2nb.novel.vo; + +import com.fasterxml.jackson.annotation.JsonFormat; +import com.java2nb.novel.entity.CrawlSingleTask; +import com.java2nb.novel.entity.CrawlSource; +import lombok.Data; + +import java.util.Date; + +/** + * @author Administrator + */ +@Data +public class CrawlSingleTaskVO extends CrawlSingleTask { + + @JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm") + private Date createTime; + + + + + @Override + public String toString() { + return super.toString(); + } +} diff --git a/novel-crawl/src/main/resources/static/css/user.css b/novel-crawl/src/main/resources/static/css/user.css index 8ff39ef..76624c7 100644 --- a/novel-crawl/src/main/resources/static/css/user.css +++ b/novel-crawl/src/main/resources/static/css/user.css @@ -10,7 +10,7 @@ .user_l .log_list { width:350px } .user_l .s_input { margin-bottom:25px; font-size:14px } .s_input { width:348px; height:30px; line-height:38px\9; vertical-align:middle; border:1px solid #ddd; border-radius:2px } -.icon_name, .icon_key, .icon_code { width:312px; padding-left:36px; background:url(../images/icon_user.png) no-repeat 13px 13px } +.icon_name, .icon_key, .icon_code { width:312px; padding-left:36px} .icon_key { background-position: 13px -51px } .icon_code { background-position: 13px -117px; width:200px; float:left } .code_pic { height:38px; float:right } @@ -37,7 +37,7 @@ .fast_tit .title { background:#fff; font-size:16px; padding:3px 14px; position:relative; display:inline-block; z-index:999 } /*userinfo*/ .my_l { width:198px; float:left; font-size: 13px; padding-top: 20px; } -.my_l li a { display:block; height:42px; line-height:42px; padding-left:62px; border-left:4px solid #fff; background:url(../images/icon_user.png) no-repeat; margin-bottom:5px; color: #666 } +.my_l li a { display:block; height:42px; line-height:42px; padding-left:62px; border-left:4px solid #fff; margin-bottom:5px; color: #666 } .my_l li .on { background-color:#fafafa; border-left:2px solid #f80; color:#000; border-radius: 0 2px 2px 0 } .my_l .link_1 { background-position:32px -188px } .my_l .link_2 { background-position:32px -230px } diff --git a/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_add.html b/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_add.html new file mode 100644 index 0000000..37e216a --- /dev/null +++ b/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_add.html @@ -0,0 +1,188 @@ + + + + + + + 爬虫管理系统-小说精品屋 + + + + + + +
+ +
+ +
+
+ +
+
+ +
+
+ +
+
+

采集信息填写(示例均为笔趣阁:http://www.mcmssc.com)

+
    +
  • + + 采集源: +
  • + 采集分类: +
  • +
  • + 示例:73_73911 +
  • + 示例:苏厨 +
  • + 示例:二子从周 +
  • + + + + +
  • +
+
+ +
+
+
+
+
+ + + + + + + + + diff --git a/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_list.html b/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_list.html new file mode 100644 index 0000000..fcac96d --- /dev/null +++ b/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_list.html @@ -0,0 +1,230 @@ + + + + + + + 爬虫管理系统-小说精品屋 + + + + + + +
+ +
+ +
+
+ +
+
+
+

单本采集任务列表

+ +
+ +
+
+ + + + + + + + + + + + + + + +
+ 序号 + + 采集小说名 + + 采集小说作者名 + + 采集次数 + + 状态 + + 创建时间 + + 操作 +
+
+
+ + + + + + + + + + + + + + + diff --git a/novel-crawl/src/main/resources/templates/crawl/crawlSource_add.html b/novel-crawl/src/main/resources/templates/crawl/crawlSource_add.html index 74b7f20..ec89413 100644 --- a/novel-crawl/src/main/resources/templates/crawl/crawlSource_add.html +++ b/novel-crawl/src/main/resources/templates/crawl/crawlSource_add.html @@ -29,6 +29,7 @@ diff --git a/novel-crawl/src/main/resources/templates/crawl/crawlSource_list.html b/novel-crawl/src/main/resources/templates/crawl/crawlSource_list.html index b7eda42..0ce7416 100644 --- a/novel-crawl/src/main/resources/templates/crawl/crawlSource_list.html +++ b/novel-crawl/src/main/resources/templates/crawl/crawlSource_list.html @@ -28,6 +28,7 @@ diff --git a/sql/20200615.sql b/sql/20200615.sql new file mode 100644 index 0000000..cac7888 --- /dev/null +++ b/sql/20200615.sql @@ -0,0 +1,40 @@ +/* +Navicat MySQL Data Transfer + +Source Server : localhost +Source Server Version : 50725 +Source Host : localhost:3306 +Source Database : novel_plus + +Target Server Type : MYSQL +Target Server Version : 50725 +File Encoding : 65001 + +Date: 2020-06-15 15:06:55 +*/ + +SET FOREIGN_KEY_CHECKS=0; + +-- ---------------------------- +-- Table structure for crawl_single_task +-- ---------------------------- +DROP TABLE IF EXISTS `crawl_single_task`; +CREATE TABLE `crawl_single_task` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '主键', + `source_id` int(11) DEFAULT NULL COMMENT '爬虫源ID', + `source_name` varchar(50) DEFAULT NULL COMMENT '爬虫源名', + `source_book_id` varchar(255) DEFAULT NULL COMMENT '源站小说ID', + `cat_id` int(11) DEFAULT NULL COMMENT '分类ID', + `book_name` varchar(50) DEFAULT NULL COMMENT '爬取的小说名', + `author_name` varchar(50) DEFAULT NULL COMMENT '爬取的小说作者名', + `task_status` tinyint(1) DEFAULT '2' COMMENT '任务状态,0:失败,1:成功,2;未执行', + `exc_count` tinyint(2) DEFAULT '0' COMMENT '已经执行次数,最多执行5次', + `create_time` datetime DEFAULT NULL COMMENT '创建时间', + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=8 DEFAULT CHARSET=utf8mb4 COMMENT='抓取单本小说任务表'; + +-- ---------------------------- +-- Records of crawl_single_task +-- ---------------------------- +INSERT INTO `crawl_single_task` VALUES ('6', '2', '百书斋', '1', '1', '1', '1', '0', '5', '2020-06-15 14:36:07'); +INSERT INTO `crawl_single_task` VALUES ('7', '5', '笔趣阁', '108_108291', '1', '衍天志之不朽仙', '白衣少年丶', '1', '1', '2020-06-15 14:46:08'); diff --git a/sql/novel_plus.sql b/sql/novel_plus.sql index f7bf301..6601b32 100644 --- a/sql/novel_plus.sql +++ b/sql/novel_plus.sql @@ -1809,4 +1809,26 @@ CREATE TABLE `book_content9` ( PRIMARY KEY (`id`), UNIQUE KEY `key_uq_indexId` (`index_id`) USING BTREE ) ENGINE=InnoDB AUTO_INCREMENT=415 DEFAULT CHARSET=utf8mb4 COMMENT='小说内容表'; + + +DROP TABLE IF EXISTS `crawl_single_task`; +CREATE TABLE `crawl_single_task` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '主键', + `source_id` int(11) DEFAULT NULL COMMENT '爬虫源ID', + `source_name` varchar(50) DEFAULT NULL COMMENT '爬虫源名', + `source_book_id` varchar(255) DEFAULT NULL COMMENT '源站小说ID', + `cat_id` int(11) DEFAULT NULL COMMENT '分类ID', + `book_name` varchar(50) DEFAULT NULL COMMENT '爬取的小说名', + `author_name` varchar(50) DEFAULT NULL COMMENT '爬取的小说作者名', + `task_status` tinyint(1) DEFAULT '2' COMMENT '任务状态,0:失败,1:成功,2;未执行', + `exc_count` tinyint(2) DEFAULT '0' COMMENT '已经执行次数,最多执行5次', + `create_time` datetime DEFAULT NULL COMMENT '创建时间', + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=8 DEFAULT CHARSET=utf8mb4 COMMENT='抓取单本小说任务表'; + +-- ---------------------------- +-- Records of crawl_single_task +-- ---------------------------- +INSERT INTO `crawl_single_task` VALUES ('6', '2', '百书斋', '1', '1', '1', '1', '0', '5', '2020-06-15 14:36:07'); +INSERT INTO `crawl_single_task` VALUES ('7', '5', '笔趣阁', '108_108291', '1', '衍天志之不朽仙', '白衣少年丶', '1', '1', '2020-06-15 14:46:08'); UPDATE `crawl_source` SET `source_name` = '书趣阁', `crawl_rule` = '{\n \"bookListUrl\": \"http://m.shuquge.com/sort/{catId}/0_{page}.html\",\n \"catIdRule\": {\n \"catId1\": \"1\",\n \"catId2\": \"2\",\n \"catId3\": \"3\",\n \"catId4\": \"4\",\n \"catId5\": \"7\",\n \"catId6\": \"6\",\n \"catId7\": \"8\"\n },\n \"bookIdPatten\": \"href=\\\"/s/(\\\\d+)\\\\.html\\\"\",\n \"pagePatten\": \"第(\\\\d+)/\\\\d+页\",\n \"totalPagePatten\": \"第\\\\d+/(\\\\d+)页\",\n \"bookDetailUrl\": \"http://m.shuquge.com/s/{bookId}.html\",\n \"bookNamePatten\": \"

([^/]+)

\",\n \"authorNamePatten\": \"

作者:([^/]+)

\",\n \"picUrlPatten\": \"src=\\\"(http://www.shuquge.com/files/article/image/\\\\d+/\\\\d+/\\\\d+s\\\\.jpg)\\\"\",\n \"statusPatten\": \"

状态:([^/]+)

\",\n \"bookStatusRule\": {\n \"连载中\": 0,\n \"完本\": 1\n },\n \"descStart\": \"
\",\n \"descEnd\": \"最新章节推荐地址\",\n \"bookIndexUrl\": \"http://www.shuquge.com/txt/{bookId}/index.html\",\n \"bookIndexStart\": \"
《\",\n \"indexIdPatten\": \"
[^/]+
\",\n \"indexNamePatten\": \"
([^/]+)
\",\n \"bookContentUrl\": \"http://www.shuquge.com/txt/{bookId}/{indexId}.html\",\n \"contentStart\": \"
\",\n \"contentEnd\": \"http://www.shuquge.com\"\n}', `source_status` = 1, `create_time` = '2020-05-18 12:02:34', `update_time` = '2020-05-18 12:02:34' WHERE `id` = 4; \ No newline at end of file