diff --git a/doc/sql/20250711.sql b/doc/sql/20250711.sql new file mode 100644 index 0000000..e50a849 --- /dev/null +++ b/doc/sql/20250711.sql @@ -0,0 +1,3 @@ +alter table crawl_single_task add column crawl_chapters int DEFAULT 0 COMMENT '采集章节数量' after exc_count ; + + diff --git a/doc/sql/novel_plus.sql b/doc/sql/novel_plus.sql index b73201b..53bc41e 100644 --- a/doc/sql/novel_plus.sql +++ b/doc/sql/novel_plus.sql @@ -3159,3 +3159,4 @@ where menu_id = 57; alter table book_comment add column location varchar(50) DEFAULT NULL COMMENT '地理位置' after comment_content ; +alter table crawl_single_task add column crawl_chapters int DEFAULT 0 COMMENT '采集章节数量' after exc_count ; diff --git a/novel-common/src/main/java/com/java2nb/novel/entity/CrawlSingleTask.java b/novel-common/src/main/java/com/java2nb/novel/entity/CrawlSingleTask.java index 0f72b05..274d9dc 100644 --- a/novel-common/src/main/java/com/java2nb/novel/entity/CrawlSingleTask.java +++ b/novel-common/src/main/java/com/java2nb/novel/entity/CrawlSingleTask.java @@ -31,6 +31,9 @@ public class CrawlSingleTask { @Generated("org.mybatis.generator.api.MyBatisGenerator") private Byte excCount; + @Generated("org.mybatis.generator.api.MyBatisGenerator") + private Integer crawlChapters; + @Generated("org.mybatis.generator.api.MyBatisGenerator") private Date createTime; @@ -124,6 +127,16 @@ public class CrawlSingleTask { this.excCount = excCount; } + @Generated("org.mybatis.generator.api.MyBatisGenerator") + public Integer getCrawlChapters() { + return crawlChapters; + } + + @Generated("org.mybatis.generator.api.MyBatisGenerator") + public void setCrawlChapters(Integer crawlChapters) { + this.crawlChapters = crawlChapters; + } + @Generated("org.mybatis.generator.api.MyBatisGenerator") public Date getCreateTime() { return createTime; diff --git a/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskDynamicSqlSupport.java b/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskDynamicSqlSupport.java index f26a068..fad7e2c 100644 --- a/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskDynamicSqlSupport.java +++ b/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskDynamicSqlSupport.java @@ -37,6 +37,9 @@ public final class CrawlSingleTaskDynamicSqlSupport { @Generated("org.mybatis.generator.api.MyBatisGenerator") public static final SqlColumn excCount = crawlSingleTask.excCount; + @Generated("org.mybatis.generator.api.MyBatisGenerator") + public static final SqlColumn crawlChapters = crawlSingleTask.crawlChapters; + @Generated("org.mybatis.generator.api.MyBatisGenerator") public static final SqlColumn createTime = crawlSingleTask.createTime; @@ -60,6 +63,8 @@ public final class CrawlSingleTaskDynamicSqlSupport { public final SqlColumn excCount = column("exc_count", JDBCType.TINYINT); + public final SqlColumn crawlChapters = column("crawl_chapters", JDBCType.INTEGER); + public final SqlColumn createTime = column("create_time", JDBCType.TIMESTAMP); public CrawlSingleTask() { diff --git a/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskMapper.java b/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskMapper.java index 646bc1e..5578448 100644 --- a/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskMapper.java +++ b/novel-common/src/main/java/com/java2nb/novel/mapper/CrawlSingleTaskMapper.java @@ -35,7 +35,7 @@ import org.mybatis.dynamic.sql.util.mybatis3.MyBatis3Utils; @Mapper public interface CrawlSingleTaskMapper { @Generated("org.mybatis.generator.api.MyBatisGenerator") - BasicColumn[] selectList = BasicColumn.columnList(id, sourceId, sourceName, sourceBookId, catId, bookName, authorName, taskStatus, excCount, createTime); + BasicColumn[] selectList = BasicColumn.columnList(id, sourceId, sourceName, sourceBookId, catId, bookName, authorName, taskStatus, excCount, crawlChapters, createTime); @Generated("org.mybatis.generator.api.MyBatisGenerator") @SelectProvider(type=SqlProviderAdapter.class, method="select") @@ -70,6 +70,7 @@ public interface CrawlSingleTaskMapper { @Result(column="author_name", property="authorName", jdbcType=JdbcType.VARCHAR), @Result(column="task_status", property="taskStatus", jdbcType=JdbcType.TINYINT), @Result(column="exc_count", property="excCount", jdbcType=JdbcType.TINYINT), + @Result(column="crawl_chapters", property="crawlChapters", jdbcType=JdbcType.INTEGER), @Result(column="create_time", property="createTime", jdbcType=JdbcType.TIMESTAMP) }) List selectMany(SelectStatementProvider selectStatement); @@ -90,7 +91,7 @@ public interface CrawlSingleTaskMapper { @Generated("org.mybatis.generator.api.MyBatisGenerator") default int deleteByPrimaryKey(Long id_) { - return delete(c -> + return delete(c -> c.where(id, isEqualTo(id_)) ); } @@ -99,15 +100,16 @@ public interface CrawlSingleTaskMapper { default int insert(CrawlSingleTask record) { return MyBatis3Utils.insert(this::insert, record, crawlSingleTask, c -> c.map(id).toProperty("id") - .map(sourceId).toProperty("sourceId") - .map(sourceName).toProperty("sourceName") - .map(sourceBookId).toProperty("sourceBookId") - .map(catId).toProperty("catId") - .map(bookName).toProperty("bookName") - .map(authorName).toProperty("authorName") - .map(taskStatus).toProperty("taskStatus") - .map(excCount).toProperty("excCount") - .map(createTime).toProperty("createTime") + .map(sourceId).toProperty("sourceId") + .map(sourceName).toProperty("sourceName") + .map(sourceBookId).toProperty("sourceBookId") + .map(catId).toProperty("catId") + .map(bookName).toProperty("bookName") + .map(authorName).toProperty("authorName") + .map(taskStatus).toProperty("taskStatus") + .map(excCount).toProperty("excCount") + .map(crawlChapters).toProperty("crawlChapters") + .map(createTime).toProperty("createTime") ); } @@ -115,15 +117,16 @@ public interface CrawlSingleTaskMapper { default int insertMultiple(Collection records) { return MyBatis3Utils.insertMultiple(this::insertMultiple, records, crawlSingleTask, c -> c.map(id).toProperty("id") - .map(sourceId).toProperty("sourceId") - .map(sourceName).toProperty("sourceName") - .map(sourceBookId).toProperty("sourceBookId") - .map(catId).toProperty("catId") - .map(bookName).toProperty("bookName") - .map(authorName).toProperty("authorName") - .map(taskStatus).toProperty("taskStatus") - .map(excCount).toProperty("excCount") - .map(createTime).toProperty("createTime") + .map(sourceId).toProperty("sourceId") + .map(sourceName).toProperty("sourceName") + .map(sourceBookId).toProperty("sourceBookId") + .map(catId).toProperty("catId") + .map(bookName).toProperty("bookName") + .map(authorName).toProperty("authorName") + .map(taskStatus).toProperty("taskStatus") + .map(excCount).toProperty("excCount") + .map(crawlChapters).toProperty("crawlChapters") + .map(createTime).toProperty("createTime") ); } @@ -131,15 +134,16 @@ public interface CrawlSingleTaskMapper { default int insertSelective(CrawlSingleTask record) { return MyBatis3Utils.insert(this::insert, record, crawlSingleTask, c -> c.map(id).toPropertyWhenPresent("id", record::getId) - .map(sourceId).toPropertyWhenPresent("sourceId", record::getSourceId) - .map(sourceName).toPropertyWhenPresent("sourceName", record::getSourceName) - .map(sourceBookId).toPropertyWhenPresent("sourceBookId", record::getSourceBookId) - .map(catId).toPropertyWhenPresent("catId", record::getCatId) - .map(bookName).toPropertyWhenPresent("bookName", record::getBookName) - .map(authorName).toPropertyWhenPresent("authorName", record::getAuthorName) - .map(taskStatus).toPropertyWhenPresent("taskStatus", record::getTaskStatus) - .map(excCount).toPropertyWhenPresent("excCount", record::getExcCount) - .map(createTime).toPropertyWhenPresent("createTime", record::getCreateTime) + .map(sourceId).toPropertyWhenPresent("sourceId", record::getSourceId) + .map(sourceName).toPropertyWhenPresent("sourceName", record::getSourceName) + .map(sourceBookId).toPropertyWhenPresent("sourceBookId", record::getSourceBookId) + .map(catId).toPropertyWhenPresent("catId", record::getCatId) + .map(bookName).toPropertyWhenPresent("bookName", record::getBookName) + .map(authorName).toPropertyWhenPresent("authorName", record::getAuthorName) + .map(taskStatus).toPropertyWhenPresent("taskStatus", record::getTaskStatus) + .map(excCount).toPropertyWhenPresent("excCount", record::getExcCount) + .map(crawlChapters).toPropertyWhenPresent("crawlChapters", record::getCrawlChapters) + .map(createTime).toPropertyWhenPresent("createTime", record::getCreateTime) ); } @@ -173,35 +177,7 @@ public interface CrawlSingleTaskMapper { @Generated("org.mybatis.generator.api.MyBatisGenerator") static UpdateDSL updateAllColumns(CrawlSingleTask record, UpdateDSL dsl) { return dsl.set(id).equalTo(record::getId) - .set(sourceId).equalTo(record::getSourceId) - .set(sourceName).equalTo(record::getSourceName) - .set(sourceBookId).equalTo(record::getSourceBookId) - .set(catId).equalTo(record::getCatId) - .set(bookName).equalTo(record::getBookName) - .set(authorName).equalTo(record::getAuthorName) - .set(taskStatus).equalTo(record::getTaskStatus) - .set(excCount).equalTo(record::getExcCount) - .set(createTime).equalTo(record::getCreateTime); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - static UpdateDSL updateSelectiveColumns(CrawlSingleTask record, UpdateDSL dsl) { - return dsl.set(id).equalToWhenPresent(record::getId) - .set(sourceId).equalToWhenPresent(record::getSourceId) - .set(sourceName).equalToWhenPresent(record::getSourceName) - .set(sourceBookId).equalToWhenPresent(record::getSourceBookId) - .set(catId).equalToWhenPresent(record::getCatId) - .set(bookName).equalToWhenPresent(record::getBookName) - .set(authorName).equalToWhenPresent(record::getAuthorName) - .set(taskStatus).equalToWhenPresent(record::getTaskStatus) - .set(excCount).equalToWhenPresent(record::getExcCount) - .set(createTime).equalToWhenPresent(record::getCreateTime); - } - - @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int updateByPrimaryKey(CrawlSingleTask record) { - return update(c -> - c.set(sourceId).equalTo(record::getSourceId) + .set(sourceId).equalTo(record::getSourceId) .set(sourceName).equalTo(record::getSourceName) .set(sourceBookId).equalTo(record::getSourceBookId) .set(catId).equalTo(record::getCatId) @@ -209,15 +185,14 @@ public interface CrawlSingleTaskMapper { .set(authorName).equalTo(record::getAuthorName) .set(taskStatus).equalTo(record::getTaskStatus) .set(excCount).equalTo(record::getExcCount) - .set(createTime).equalTo(record::getCreateTime) - .where(id, isEqualTo(record::getId)) - ); + .set(crawlChapters).equalTo(record::getCrawlChapters) + .set(createTime).equalTo(record::getCreateTime); } @Generated("org.mybatis.generator.api.MyBatisGenerator") - default int updateByPrimaryKeySelective(CrawlSingleTask record) { - return update(c -> - c.set(sourceId).equalToWhenPresent(record::getSourceId) + static UpdateDSL updateSelectiveColumns(CrawlSingleTask record, UpdateDSL dsl) { + return dsl.set(id).equalToWhenPresent(record::getId) + .set(sourceId).equalToWhenPresent(record::getSourceId) .set(sourceName).equalToWhenPresent(record::getSourceName) .set(sourceBookId).equalToWhenPresent(record::getSourceBookId) .set(catId).equalToWhenPresent(record::getCatId) @@ -225,8 +200,41 @@ public interface CrawlSingleTaskMapper { .set(authorName).equalToWhenPresent(record::getAuthorName) .set(taskStatus).equalToWhenPresent(record::getTaskStatus) .set(excCount).equalToWhenPresent(record::getExcCount) - .set(createTime).equalToWhenPresent(record::getCreateTime) - .where(id, isEqualTo(record::getId)) + .set(crawlChapters).equalToWhenPresent(record::getCrawlChapters) + .set(createTime).equalToWhenPresent(record::getCreateTime); + } + + @Generated("org.mybatis.generator.api.MyBatisGenerator") + default int updateByPrimaryKey(CrawlSingleTask record) { + return update(c -> + c.set(sourceId).equalTo(record::getSourceId) + .set(sourceName).equalTo(record::getSourceName) + .set(sourceBookId).equalTo(record::getSourceBookId) + .set(catId).equalTo(record::getCatId) + .set(bookName).equalTo(record::getBookName) + .set(authorName).equalTo(record::getAuthorName) + .set(taskStatus).equalTo(record::getTaskStatus) + .set(excCount).equalTo(record::getExcCount) + .set(crawlChapters).equalTo(record::getCrawlChapters) + .set(createTime).equalTo(record::getCreateTime) + .where(id, isEqualTo(record::getId)) + ); + } + + @Generated("org.mybatis.generator.api.MyBatisGenerator") + default int updateByPrimaryKeySelective(CrawlSingleTask record) { + return update(c -> + c.set(sourceId).equalToWhenPresent(record::getSourceId) + .set(sourceName).equalToWhenPresent(record::getSourceName) + .set(sourceBookId).equalToWhenPresent(record::getSourceBookId) + .set(catId).equalToWhenPresent(record::getCatId) + .set(bookName).equalToWhenPresent(record::getBookName) + .set(authorName).equalToWhenPresent(record::getAuthorName) + .set(taskStatus).equalToWhenPresent(record::getTaskStatus) + .set(excCount).equalToWhenPresent(record::getExcCount) + .set(crawlChapters).equalToWhenPresent(record::getCrawlChapters) + .set(createTime).equalToWhenPresent(record::getCreateTime) + .where(id, isEqualTo(record::getId)) ); } } \ No newline at end of file diff --git a/novel-crawl/src/main/java/com/java2nb/novel/controller/CrawlController.java b/novel-crawl/src/main/java/com/java2nb/novel/controller/CrawlController.java index 6b312fc..84f8de0 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/controller/CrawlController.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/controller/CrawlController.java @@ -153,6 +153,14 @@ public class CrawlController { return RestResult.ok(); } + /** + * 采集任务进度查询 + * */ + @GetMapping("getTaskProgress/{id}") + public RestResult getTaskProgress(@PathVariable("id") Long id){ + return RestResult.ok(crawlService.getTaskProgress(id)); + } + diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java index f9d6d32..8c20655 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java @@ -1,25 +1,24 @@ package com.java2nb.novel.core.crawl; +import com.java2nb.novel.core.cache.CacheKey; +import com.java2nb.novel.core.cache.CacheService; import com.java2nb.novel.core.utils.RandomBookInfoUtil; import com.java2nb.novel.core.utils.StringUtil; import com.java2nb.novel.entity.Book; import com.java2nb.novel.entity.BookContent; import com.java2nb.novel.entity.BookIndex; +import com.java2nb.novel.entity.CrawlSingleTask; import com.java2nb.novel.utils.Constants; import com.java2nb.novel.utils.CrawlHttpClient; import io.github.xxyopen.util.IdWorker; import lombok.RequiredArgsConstructor; -import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.springframework.stereotype.Component; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -37,6 +36,18 @@ public class CrawlParser { private final CrawlHttpClient crawlHttpClient; + /** + * 爬虫任务进度 + */ + private final Map crawlTaskProgress = new HashMap<>(); + + /** + * 获取爬虫任务进度 + */ + public Integer getCrawlTaskProgress(Long taskId) { + return crawlTaskProgress.get(taskId); + } + public void parseBook(RuleBean ruleBean, String bookId, CrawlBookHandler handler) throws InterruptedException { Book book = new Book(); @@ -156,7 +167,7 @@ public class CrawlParser { } else if (book.getVisitCount() != null && book.getScore() == null) { //随机根据访问次数生成评分 book.setScore(RandomBookInfoUtil.getScoreByVisitCount(book.getVisitCount())); - } else if (book.getVisitCount() == null && book.getScore() == null) { + } else if (book.getVisitCount() == null) { //都没有,设置成固定值 book.setVisitCount(Constants.VISIT_COUNT_DEFAULT); book.setScore(6.5f); @@ -167,7 +178,13 @@ public class CrawlParser { } public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean, - Map existBookIndexMap, CrawlBookChapterHandler handler) throws InterruptedException { + Map existBookIndexMap, CrawlBookChapterHandler handler, CrawlSingleTask task) + throws InterruptedException { + + if (task != null) { + // 开始采集 + crawlTaskProgress.put(task.getId(), 0); + } Date currentDate = new Date(); @@ -225,7 +242,7 @@ public class CrawlParser { calResult = sourceIndexId.substring(0, sourceBookId.length() - y); } - if (calResult.length() == 0) { + if (calResult.isEmpty()) { calResult = "0"; } @@ -291,6 +308,11 @@ public class CrawlParser { } bookIndex.setUpdateTime(currentDate); + if (task != null) { + // 更新采集进度 + crawlTaskProgress.put(task.getId(), indexNum + 1); + } + } @@ -300,10 +322,10 @@ public class CrawlParser { isFindIndex = indexIdMatch.find() & indexNameMatch.find(); } - if (indexList.size() > 0) { + if (!indexList.isEmpty()) { //如果有爬到最新章节,则设置小说主表的最新章节信息 //获取爬取到的最新章节 - BookIndex lastIndex = indexList.get(indexList.size() - 1); + BookIndex lastIndex = indexList.getLast(); book.setLastIndexId(lastIndex.getId()); book.setLastIndexName(lastIndex.getIndexName()); book.setLastIndexUpdateTime(currentDate); @@ -312,7 +334,7 @@ public class CrawlParser { book.setWordCount(totalWordCount); book.setUpdateTime(currentDate); - if (indexList.size() == contentList.size() && indexList.size() > 0) { + if (indexList.size() == contentList.size() && !indexList.isEmpty()) { handler.handle(new ChapterBean() {{ setBookIndexList(indexList); diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java b/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java index a062ffc..4ca21cd 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/listener/StarterListener.java @@ -74,8 +74,10 @@ public class StarterListener implements ServletContextInitializer { needUpdateBook.getId()); //解析章节目录 crawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book, - ruleBean, existBookIndexMap, chapter -> bookService.updateBookAndIndexAndContent(book, chapter.getBookIndexList(), - chapter.getBookContentList(), existBookIndexMap)); + ruleBean, existBookIndexMap, + chapter -> bookService.updateBookAndIndexAndContent(book, + chapter.getBookIndexList(), + chapter.getBookContentList(), existBookIndexMap), null); }); } catch (Exception e) { log.error(e.getMessage(), e); @@ -107,9 +109,8 @@ public class StarterListener implements ServletContextInitializer { //查询爬虫规则 CrawlSource source = crawlService.queryCrawlSource(task.getSourceId()); RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class); - if (crawlService.parseBookAndSave(task.getCatId(), ruleBean, task.getSourceId(), - task.getSourceBookId())) { + task.getSourceBookId(), task)) { //采集成功 crawlStatus = 1; } @@ -122,6 +123,7 @@ public class StarterListener implements ServletContextInitializer { } catch (Exception e) { log.error(e.getMessage(), e); } + if (task != null) { crawlService.updateCrawlSingleTask(task, crawlStatus); } diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/CrawlService.java b/novel-crawl/src/main/java/com/java2nb/novel/service/CrawlService.java index f6dbb97..b8a0620 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/service/CrawlService.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/service/CrawlService.java @@ -47,13 +47,15 @@ public interface CrawlService { /** * 采集并保存小说 - * @param catId 分类ID - * @param bookId 小说ID - * @param sourceId 源ID + * + * @param catId 分类ID * @param ruleBean 采集规则\ + * @param sourceId 源ID + * @param bookId 小说ID + * @param task * @return true:成功,false:失败 - * */ - boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId) throws InterruptedException; + */ + boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId, CrawlSingleTask task) throws InterruptedException; /** * 根据爬虫状态查询爬虫源集合 @@ -117,4 +119,9 @@ public interface CrawlService { * @return */ CrawlSource getCrawlSource(Integer id); + + /** + * 采集任务进度查询 + * */ + Integer getTaskProgress(Long taskId); } diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java index 5bb92ea..94a9a0d 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java @@ -2,12 +2,10 @@ package com.java2nb.novel.service.impl; import com.fasterxml.jackson.databind.ObjectMapper; import com.github.pagehelper.PageHelper; -import com.java2nb.novel.core.cache.CacheKey; import com.java2nb.novel.core.cache.CacheService; import com.java2nb.novel.core.crawl.CrawlParser; import com.java2nb.novel.core.crawl.RuleBean; import com.java2nb.novel.core.enums.ResponseStatus; -import com.java2nb.novel.core.utils.SpringUtil; import com.java2nb.novel.entity.Book; import com.java2nb.novel.entity.CrawlSingleTask; import com.java2nb.novel.entity.CrawlSource; @@ -60,8 +58,6 @@ public class CrawlServiceImpl implements CrawlService { private final BookService bookService; - private final CacheService cacheService; - private final IdWorker idWorker = IdWorker.INSTANCE; private final CrawlHttpClient crawlHttpClient; @@ -198,6 +194,16 @@ public class CrawlServiceImpl implements CrawlService { List crawlSingleTasks = crawlSingleTaskMapper.selectMany(render); PageBean pageBean = PageBuilder.build(crawlSingleTasks); pageBean.setList(BeanUtil.copyList(crawlSingleTasks, CrawlSingleTaskVO.class)); + for (CrawlSingleTask crawlSingleTask : pageBean.getList()) { + if (crawlSingleTask.getTaskStatus() == 2 + && crawlParser.getCrawlTaskProgress(crawlSingleTask.getId()) != null) { + // 如果排队中的任务有任务进度 + // 1.设置任务进度 + crawlSingleTask.setCrawlChapters(crawlParser.getCrawlTaskProgress(crawlSingleTask.getId())); + // 2.将排队中的任务状态修改成采集中 + crawlSingleTask.setTaskStatus((byte) 3); + } + } return pageBean; } @@ -227,9 +233,13 @@ public class CrawlServiceImpl implements CrawlService { excCount += 1; task.setExcCount(excCount); if (status == 1 || excCount == 5) { - //当采集成功或者采集次数等于5,则更新采集最终状态,并停止采集 + // 当采集成功或者采集次数等于5,则更新采集最终状态,并停止采集 task.setTaskStatus(status); } + if (status == 1) { + // 当采集成功,保存采集的章节数量 + task.setCrawlChapters(crawlParser.getCrawlTaskProgress(task.getId())); + } crawlSingleTaskMapper.updateByPrimaryKeySelective(task); } @@ -244,6 +254,11 @@ public class CrawlServiceImpl implements CrawlService { return null; } + @Override + public Integer getTaskProgress(Long taskId) { + return Optional.ofNullable(crawlParser.getCrawlTaskProgress(taskId)).orElse(0); + } + /** * 解析分类列表 */ @@ -291,7 +306,7 @@ public class CrawlServiceImpl implements CrawlService { } String bookId = bookIdMatcher.group(1); - parseBookAndSave(catId, ruleBean, sourceId, bookId); + parseBookAndSave(catId, ruleBean, sourceId, bookId, null); } catch (InterruptedException e) { log.error(e.getMessage(), e); //1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时) @@ -345,7 +360,7 @@ public class CrawlServiceImpl implements CrawlService { } @Override - public boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId) + public boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId, CrawlSingleTask task) throws InterruptedException { final AtomicBoolean parseResult = new AtomicBoolean(false); @@ -378,7 +393,7 @@ public class CrawlServiceImpl implements CrawlService { new HashMap<>(0), chapter -> { bookService.saveBookAndIndexAndContent(book, chapter.getBookIndexList(), chapter.getBookContentList()); - }); + }, task); parseResult.set(parseIndexContentResult); } else { diff --git a/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_list.html b/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_list.html index d7fe935..f0bca00 100644 --- a/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_list.html +++ b/novel-crawl/src/main/resources/templates/crawl/crawlSingleTask_list.html @@ -48,6 +48,9 @@ 采集小说作者名 + + 采集进度 + 采集次数 @@ -113,9 +116,15 @@