mirror of
https://github.com/201206030/novel-plus.git
synced 2025-07-14 21:26:41 +00:00
feat(novel-crawl): 增加单本采集任务进度显示功能
This commit is contained in:
3
doc/sql/20250711.sql
Normal file
3
doc/sql/20250711.sql
Normal file
@ -0,0 +1,3 @@
|
||||
alter table crawl_single_task add column crawl_chapters int DEFAULT 0 COMMENT '采集章节数量' after exc_count ;
|
||||
|
||||
|
@ -3159,3 +3159,4 @@ where menu_id = 57;
|
||||
alter table book_comment add column location varchar(50) DEFAULT NULL COMMENT '地理位置' after comment_content ;
|
||||
|
||||
|
||||
alter table crawl_single_task add column crawl_chapters int DEFAULT 0 COMMENT '采集章节数量' after exc_count ;
|
||||
|
@ -31,6 +31,9 @@ public class CrawlSingleTask {
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
private Byte excCount;
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
private Integer crawlChapters;
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
private Date createTime;
|
||||
|
||||
@ -124,6 +127,16 @@ public class CrawlSingleTask {
|
||||
this.excCount = excCount;
|
||||
}
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
public Integer getCrawlChapters() {
|
||||
return crawlChapters;
|
||||
}
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
public void setCrawlChapters(Integer crawlChapters) {
|
||||
this.crawlChapters = crawlChapters;
|
||||
}
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
public Date getCreateTime() {
|
||||
return createTime;
|
||||
|
@ -37,6 +37,9 @@ public final class CrawlSingleTaskDynamicSqlSupport {
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
public static final SqlColumn<Byte> excCount = crawlSingleTask.excCount;
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
public static final SqlColumn<Integer> crawlChapters = crawlSingleTask.crawlChapters;
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
public static final SqlColumn<Date> createTime = crawlSingleTask.createTime;
|
||||
|
||||
@ -60,6 +63,8 @@ public final class CrawlSingleTaskDynamicSqlSupport {
|
||||
|
||||
public final SqlColumn<Byte> excCount = column("exc_count", JDBCType.TINYINT);
|
||||
|
||||
public final SqlColumn<Integer> crawlChapters = column("crawl_chapters", JDBCType.INTEGER);
|
||||
|
||||
public final SqlColumn<Date> createTime = column("create_time", JDBCType.TIMESTAMP);
|
||||
|
||||
public CrawlSingleTask() {
|
||||
|
@ -35,7 +35,7 @@ import org.mybatis.dynamic.sql.util.mybatis3.MyBatis3Utils;
|
||||
@Mapper
|
||||
public interface CrawlSingleTaskMapper {
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
BasicColumn[] selectList = BasicColumn.columnList(id, sourceId, sourceName, sourceBookId, catId, bookName, authorName, taskStatus, excCount, createTime);
|
||||
BasicColumn[] selectList = BasicColumn.columnList(id, sourceId, sourceName, sourceBookId, catId, bookName, authorName, taskStatus, excCount, crawlChapters, createTime);
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
@SelectProvider(type=SqlProviderAdapter.class, method="select")
|
||||
@ -70,6 +70,7 @@ public interface CrawlSingleTaskMapper {
|
||||
@Result(column="author_name", property="authorName", jdbcType=JdbcType.VARCHAR),
|
||||
@Result(column="task_status", property="taskStatus", jdbcType=JdbcType.TINYINT),
|
||||
@Result(column="exc_count", property="excCount", jdbcType=JdbcType.TINYINT),
|
||||
@Result(column="crawl_chapters", property="crawlChapters", jdbcType=JdbcType.INTEGER),
|
||||
@Result(column="create_time", property="createTime", jdbcType=JdbcType.TIMESTAMP)
|
||||
})
|
||||
List<CrawlSingleTask> selectMany(SelectStatementProvider selectStatement);
|
||||
@ -90,7 +91,7 @@ public interface CrawlSingleTaskMapper {
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
default int deleteByPrimaryKey(Long id_) {
|
||||
return delete(c ->
|
||||
return delete(c ->
|
||||
c.where(id, isEqualTo(id_))
|
||||
);
|
||||
}
|
||||
@ -99,15 +100,16 @@ public interface CrawlSingleTaskMapper {
|
||||
default int insert(CrawlSingleTask record) {
|
||||
return MyBatis3Utils.insert(this::insert, record, crawlSingleTask, c ->
|
||||
c.map(id).toProperty("id")
|
||||
.map(sourceId).toProperty("sourceId")
|
||||
.map(sourceName).toProperty("sourceName")
|
||||
.map(sourceBookId).toProperty("sourceBookId")
|
||||
.map(catId).toProperty("catId")
|
||||
.map(bookName).toProperty("bookName")
|
||||
.map(authorName).toProperty("authorName")
|
||||
.map(taskStatus).toProperty("taskStatus")
|
||||
.map(excCount).toProperty("excCount")
|
||||
.map(createTime).toProperty("createTime")
|
||||
.map(sourceId).toProperty("sourceId")
|
||||
.map(sourceName).toProperty("sourceName")
|
||||
.map(sourceBookId).toProperty("sourceBookId")
|
||||
.map(catId).toProperty("catId")
|
||||
.map(bookName).toProperty("bookName")
|
||||
.map(authorName).toProperty("authorName")
|
||||
.map(taskStatus).toProperty("taskStatus")
|
||||
.map(excCount).toProperty("excCount")
|
||||
.map(crawlChapters).toProperty("crawlChapters")
|
||||
.map(createTime).toProperty("createTime")
|
||||
);
|
||||
}
|
||||
|
||||
@ -115,15 +117,16 @@ public interface CrawlSingleTaskMapper {
|
||||
default int insertMultiple(Collection<CrawlSingleTask> records) {
|
||||
return MyBatis3Utils.insertMultiple(this::insertMultiple, records, crawlSingleTask, c ->
|
||||
c.map(id).toProperty("id")
|
||||
.map(sourceId).toProperty("sourceId")
|
||||
.map(sourceName).toProperty("sourceName")
|
||||
.map(sourceBookId).toProperty("sourceBookId")
|
||||
.map(catId).toProperty("catId")
|
||||
.map(bookName).toProperty("bookName")
|
||||
.map(authorName).toProperty("authorName")
|
||||
.map(taskStatus).toProperty("taskStatus")
|
||||
.map(excCount).toProperty("excCount")
|
||||
.map(createTime).toProperty("createTime")
|
||||
.map(sourceId).toProperty("sourceId")
|
||||
.map(sourceName).toProperty("sourceName")
|
||||
.map(sourceBookId).toProperty("sourceBookId")
|
||||
.map(catId).toProperty("catId")
|
||||
.map(bookName).toProperty("bookName")
|
||||
.map(authorName).toProperty("authorName")
|
||||
.map(taskStatus).toProperty("taskStatus")
|
||||
.map(excCount).toProperty("excCount")
|
||||
.map(crawlChapters).toProperty("crawlChapters")
|
||||
.map(createTime).toProperty("createTime")
|
||||
);
|
||||
}
|
||||
|
||||
@ -131,15 +134,16 @@ public interface CrawlSingleTaskMapper {
|
||||
default int insertSelective(CrawlSingleTask record) {
|
||||
return MyBatis3Utils.insert(this::insert, record, crawlSingleTask, c ->
|
||||
c.map(id).toPropertyWhenPresent("id", record::getId)
|
||||
.map(sourceId).toPropertyWhenPresent("sourceId", record::getSourceId)
|
||||
.map(sourceName).toPropertyWhenPresent("sourceName", record::getSourceName)
|
||||
.map(sourceBookId).toPropertyWhenPresent("sourceBookId", record::getSourceBookId)
|
||||
.map(catId).toPropertyWhenPresent("catId", record::getCatId)
|
||||
.map(bookName).toPropertyWhenPresent("bookName", record::getBookName)
|
||||
.map(authorName).toPropertyWhenPresent("authorName", record::getAuthorName)
|
||||
.map(taskStatus).toPropertyWhenPresent("taskStatus", record::getTaskStatus)
|
||||
.map(excCount).toPropertyWhenPresent("excCount", record::getExcCount)
|
||||
.map(createTime).toPropertyWhenPresent("createTime", record::getCreateTime)
|
||||
.map(sourceId).toPropertyWhenPresent("sourceId", record::getSourceId)
|
||||
.map(sourceName).toPropertyWhenPresent("sourceName", record::getSourceName)
|
||||
.map(sourceBookId).toPropertyWhenPresent("sourceBookId", record::getSourceBookId)
|
||||
.map(catId).toPropertyWhenPresent("catId", record::getCatId)
|
||||
.map(bookName).toPropertyWhenPresent("bookName", record::getBookName)
|
||||
.map(authorName).toPropertyWhenPresent("authorName", record::getAuthorName)
|
||||
.map(taskStatus).toPropertyWhenPresent("taskStatus", record::getTaskStatus)
|
||||
.map(excCount).toPropertyWhenPresent("excCount", record::getExcCount)
|
||||
.map(crawlChapters).toPropertyWhenPresent("crawlChapters", record::getCrawlChapters)
|
||||
.map(createTime).toPropertyWhenPresent("createTime", record::getCreateTime)
|
||||
);
|
||||
}
|
||||
|
||||
@ -173,35 +177,7 @@ public interface CrawlSingleTaskMapper {
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
static UpdateDSL<UpdateModel> updateAllColumns(CrawlSingleTask record, UpdateDSL<UpdateModel> dsl) {
|
||||
return dsl.set(id).equalTo(record::getId)
|
||||
.set(sourceId).equalTo(record::getSourceId)
|
||||
.set(sourceName).equalTo(record::getSourceName)
|
||||
.set(sourceBookId).equalTo(record::getSourceBookId)
|
||||
.set(catId).equalTo(record::getCatId)
|
||||
.set(bookName).equalTo(record::getBookName)
|
||||
.set(authorName).equalTo(record::getAuthorName)
|
||||
.set(taskStatus).equalTo(record::getTaskStatus)
|
||||
.set(excCount).equalTo(record::getExcCount)
|
||||
.set(createTime).equalTo(record::getCreateTime);
|
||||
}
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
static UpdateDSL<UpdateModel> updateSelectiveColumns(CrawlSingleTask record, UpdateDSL<UpdateModel> dsl) {
|
||||
return dsl.set(id).equalToWhenPresent(record::getId)
|
||||
.set(sourceId).equalToWhenPresent(record::getSourceId)
|
||||
.set(sourceName).equalToWhenPresent(record::getSourceName)
|
||||
.set(sourceBookId).equalToWhenPresent(record::getSourceBookId)
|
||||
.set(catId).equalToWhenPresent(record::getCatId)
|
||||
.set(bookName).equalToWhenPresent(record::getBookName)
|
||||
.set(authorName).equalToWhenPresent(record::getAuthorName)
|
||||
.set(taskStatus).equalToWhenPresent(record::getTaskStatus)
|
||||
.set(excCount).equalToWhenPresent(record::getExcCount)
|
||||
.set(createTime).equalToWhenPresent(record::getCreateTime);
|
||||
}
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
default int updateByPrimaryKey(CrawlSingleTask record) {
|
||||
return update(c ->
|
||||
c.set(sourceId).equalTo(record::getSourceId)
|
||||
.set(sourceId).equalTo(record::getSourceId)
|
||||
.set(sourceName).equalTo(record::getSourceName)
|
||||
.set(sourceBookId).equalTo(record::getSourceBookId)
|
||||
.set(catId).equalTo(record::getCatId)
|
||||
@ -209,15 +185,14 @@ public interface CrawlSingleTaskMapper {
|
||||
.set(authorName).equalTo(record::getAuthorName)
|
||||
.set(taskStatus).equalTo(record::getTaskStatus)
|
||||
.set(excCount).equalTo(record::getExcCount)
|
||||
.set(createTime).equalTo(record::getCreateTime)
|
||||
.where(id, isEqualTo(record::getId))
|
||||
);
|
||||
.set(crawlChapters).equalTo(record::getCrawlChapters)
|
||||
.set(createTime).equalTo(record::getCreateTime);
|
||||
}
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
default int updateByPrimaryKeySelective(CrawlSingleTask record) {
|
||||
return update(c ->
|
||||
c.set(sourceId).equalToWhenPresent(record::getSourceId)
|
||||
static UpdateDSL<UpdateModel> updateSelectiveColumns(CrawlSingleTask record, UpdateDSL<UpdateModel> dsl) {
|
||||
return dsl.set(id).equalToWhenPresent(record::getId)
|
||||
.set(sourceId).equalToWhenPresent(record::getSourceId)
|
||||
.set(sourceName).equalToWhenPresent(record::getSourceName)
|
||||
.set(sourceBookId).equalToWhenPresent(record::getSourceBookId)
|
||||
.set(catId).equalToWhenPresent(record::getCatId)
|
||||
@ -225,8 +200,41 @@ public interface CrawlSingleTaskMapper {
|
||||
.set(authorName).equalToWhenPresent(record::getAuthorName)
|
||||
.set(taskStatus).equalToWhenPresent(record::getTaskStatus)
|
||||
.set(excCount).equalToWhenPresent(record::getExcCount)
|
||||
.set(createTime).equalToWhenPresent(record::getCreateTime)
|
||||
.where(id, isEqualTo(record::getId))
|
||||
.set(crawlChapters).equalToWhenPresent(record::getCrawlChapters)
|
||||
.set(createTime).equalToWhenPresent(record::getCreateTime);
|
||||
}
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
default int updateByPrimaryKey(CrawlSingleTask record) {
|
||||
return update(c ->
|
||||
c.set(sourceId).equalTo(record::getSourceId)
|
||||
.set(sourceName).equalTo(record::getSourceName)
|
||||
.set(sourceBookId).equalTo(record::getSourceBookId)
|
||||
.set(catId).equalTo(record::getCatId)
|
||||
.set(bookName).equalTo(record::getBookName)
|
||||
.set(authorName).equalTo(record::getAuthorName)
|
||||
.set(taskStatus).equalTo(record::getTaskStatus)
|
||||
.set(excCount).equalTo(record::getExcCount)
|
||||
.set(crawlChapters).equalTo(record::getCrawlChapters)
|
||||
.set(createTime).equalTo(record::getCreateTime)
|
||||
.where(id, isEqualTo(record::getId))
|
||||
);
|
||||
}
|
||||
|
||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||
default int updateByPrimaryKeySelective(CrawlSingleTask record) {
|
||||
return update(c ->
|
||||
c.set(sourceId).equalToWhenPresent(record::getSourceId)
|
||||
.set(sourceName).equalToWhenPresent(record::getSourceName)
|
||||
.set(sourceBookId).equalToWhenPresent(record::getSourceBookId)
|
||||
.set(catId).equalToWhenPresent(record::getCatId)
|
||||
.set(bookName).equalToWhenPresent(record::getBookName)
|
||||
.set(authorName).equalToWhenPresent(record::getAuthorName)
|
||||
.set(taskStatus).equalToWhenPresent(record::getTaskStatus)
|
||||
.set(excCount).equalToWhenPresent(record::getExcCount)
|
||||
.set(crawlChapters).equalToWhenPresent(record::getCrawlChapters)
|
||||
.set(createTime).equalToWhenPresent(record::getCreateTime)
|
||||
.where(id, isEqualTo(record::getId))
|
||||
);
|
||||
}
|
||||
}
|
@ -153,6 +153,14 @@ public class CrawlController {
|
||||
return RestResult.ok();
|
||||
}
|
||||
|
||||
/**
|
||||
* 采集任务进度查询
|
||||
* */
|
||||
@GetMapping("getTaskProgress/{id}")
|
||||
public RestResult<Integer> getTaskProgress(@PathVariable("id") Long id){
|
||||
return RestResult.ok(crawlService.getTaskProgress(id));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -1,25 +1,24 @@
|
||||
package com.java2nb.novel.core.crawl;
|
||||
|
||||
import com.java2nb.novel.core.cache.CacheKey;
|
||||
import com.java2nb.novel.core.cache.CacheService;
|
||||
import com.java2nb.novel.core.utils.RandomBookInfoUtil;
|
||||
import com.java2nb.novel.core.utils.StringUtil;
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
import com.java2nb.novel.entity.CrawlSingleTask;
|
||||
import com.java2nb.novel.utils.Constants;
|
||||
import com.java2nb.novel.utils.CrawlHttpClient;
|
||||
import io.github.xxyopen.util.IdWorker;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
@ -37,6 +36,18 @@ public class CrawlParser {
|
||||
|
||||
private final CrawlHttpClient crawlHttpClient;
|
||||
|
||||
/**
|
||||
* 爬虫任务进度
|
||||
*/
|
||||
private final Map<Long, Integer> crawlTaskProgress = new HashMap<>();
|
||||
|
||||
/**
|
||||
* 获取爬虫任务进度
|
||||
*/
|
||||
public Integer getCrawlTaskProgress(Long taskId) {
|
||||
return crawlTaskProgress.get(taskId);
|
||||
}
|
||||
|
||||
public void parseBook(RuleBean ruleBean, String bookId, CrawlBookHandler handler)
|
||||
throws InterruptedException {
|
||||
Book book = new Book();
|
||||
@ -156,7 +167,7 @@ public class CrawlParser {
|
||||
} else if (book.getVisitCount() != null && book.getScore() == null) {
|
||||
//随机根据访问次数生成评分
|
||||
book.setScore(RandomBookInfoUtil.getScoreByVisitCount(book.getVisitCount()));
|
||||
} else if (book.getVisitCount() == null && book.getScore() == null) {
|
||||
} else if (book.getVisitCount() == null) {
|
||||
//都没有,设置成固定值
|
||||
book.setVisitCount(Constants.VISIT_COUNT_DEFAULT);
|
||||
book.setScore(6.5f);
|
||||
@ -167,7 +178,13 @@ public class CrawlParser {
|
||||
}
|
||||
|
||||
public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean,
|
||||
Map<Integer, BookIndex> existBookIndexMap, CrawlBookChapterHandler handler) throws InterruptedException {
|
||||
Map<Integer, BookIndex> existBookIndexMap, CrawlBookChapterHandler handler, CrawlSingleTask task)
|
||||
throws InterruptedException {
|
||||
|
||||
if (task != null) {
|
||||
// 开始采集
|
||||
crawlTaskProgress.put(task.getId(), 0);
|
||||
}
|
||||
|
||||
Date currentDate = new Date();
|
||||
|
||||
@ -225,7 +242,7 @@ public class CrawlParser {
|
||||
calResult = sourceIndexId.substring(0, sourceBookId.length() - y);
|
||||
}
|
||||
|
||||
if (calResult.length() == 0) {
|
||||
if (calResult.isEmpty()) {
|
||||
calResult = "0";
|
||||
|
||||
}
|
||||
@ -291,6 +308,11 @@ public class CrawlParser {
|
||||
}
|
||||
bookIndex.setUpdateTime(currentDate);
|
||||
|
||||
if (task != null) {
|
||||
// 更新采集进度
|
||||
crawlTaskProgress.put(task.getId(), indexNum + 1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -300,10 +322,10 @@ public class CrawlParser {
|
||||
isFindIndex = indexIdMatch.find() & indexNameMatch.find();
|
||||
}
|
||||
|
||||
if (indexList.size() > 0) {
|
||||
if (!indexList.isEmpty()) {
|
||||
//如果有爬到最新章节,则设置小说主表的最新章节信息
|
||||
//获取爬取到的最新章节
|
||||
BookIndex lastIndex = indexList.get(indexList.size() - 1);
|
||||
BookIndex lastIndex = indexList.getLast();
|
||||
book.setLastIndexId(lastIndex.getId());
|
||||
book.setLastIndexName(lastIndex.getIndexName());
|
||||
book.setLastIndexUpdateTime(currentDate);
|
||||
@ -312,7 +334,7 @@ public class CrawlParser {
|
||||
book.setWordCount(totalWordCount);
|
||||
book.setUpdateTime(currentDate);
|
||||
|
||||
if (indexList.size() == contentList.size() && indexList.size() > 0) {
|
||||
if (indexList.size() == contentList.size() && !indexList.isEmpty()) {
|
||||
|
||||
handler.handle(new ChapterBean() {{
|
||||
setBookIndexList(indexList);
|
||||
|
@ -74,8 +74,10 @@ public class StarterListener implements ServletContextInitializer {
|
||||
needUpdateBook.getId());
|
||||
//解析章节目录
|
||||
crawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book,
|
||||
ruleBean, existBookIndexMap, chapter -> bookService.updateBookAndIndexAndContent(book, chapter.getBookIndexList(),
|
||||
chapter.getBookContentList(), existBookIndexMap));
|
||||
ruleBean, existBookIndexMap,
|
||||
chapter -> bookService.updateBookAndIndexAndContent(book,
|
||||
chapter.getBookIndexList(),
|
||||
chapter.getBookContentList(), existBookIndexMap), null);
|
||||
});
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
@ -107,9 +109,8 @@ public class StarterListener implements ServletContextInitializer {
|
||||
//查询爬虫规则
|
||||
CrawlSource source = crawlService.queryCrawlSource(task.getSourceId());
|
||||
RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class);
|
||||
|
||||
if (crawlService.parseBookAndSave(task.getCatId(), ruleBean, task.getSourceId(),
|
||||
task.getSourceBookId())) {
|
||||
task.getSourceBookId(), task)) {
|
||||
//采集成功
|
||||
crawlStatus = 1;
|
||||
}
|
||||
@ -122,6 +123,7 @@ public class StarterListener implements ServletContextInitializer {
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
|
||||
if (task != null) {
|
||||
crawlService.updateCrawlSingleTask(task, crawlStatus);
|
||||
}
|
||||
|
@ -47,13 +47,15 @@ public interface CrawlService {
|
||||
|
||||
/**
|
||||
* 采集并保存小说
|
||||
* @param catId 分类ID
|
||||
* @param bookId 小说ID
|
||||
* @param sourceId 源ID
|
||||
*
|
||||
* @param catId 分类ID
|
||||
* @param ruleBean 采集规则\
|
||||
* @param sourceId 源ID
|
||||
* @param bookId 小说ID
|
||||
* @param task
|
||||
* @return true:成功,false:失败
|
||||
* */
|
||||
boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId) throws InterruptedException;
|
||||
*/
|
||||
boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId, CrawlSingleTask task) throws InterruptedException;
|
||||
|
||||
/**
|
||||
* 根据爬虫状态查询爬虫源集合
|
||||
@ -117,4 +119,9 @@ public interface CrawlService {
|
||||
* @return
|
||||
*/
|
||||
CrawlSource getCrawlSource(Integer id);
|
||||
|
||||
/**
|
||||
* 采集任务进度查询
|
||||
* */
|
||||
Integer getTaskProgress(Long taskId);
|
||||
}
|
||||
|
@ -2,12 +2,10 @@ package com.java2nb.novel.service.impl;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.github.pagehelper.PageHelper;
|
||||
import com.java2nb.novel.core.cache.CacheKey;
|
||||
import com.java2nb.novel.core.cache.CacheService;
|
||||
import com.java2nb.novel.core.crawl.CrawlParser;
|
||||
import com.java2nb.novel.core.crawl.RuleBean;
|
||||
import com.java2nb.novel.core.enums.ResponseStatus;
|
||||
import com.java2nb.novel.core.utils.SpringUtil;
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.CrawlSingleTask;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
@ -60,8 +58,6 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
|
||||
private final BookService bookService;
|
||||
|
||||
private final CacheService cacheService;
|
||||
|
||||
private final IdWorker idWorker = IdWorker.INSTANCE;
|
||||
|
||||
private final CrawlHttpClient crawlHttpClient;
|
||||
@ -198,6 +194,16 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
List<CrawlSingleTask> crawlSingleTasks = crawlSingleTaskMapper.selectMany(render);
|
||||
PageBean<CrawlSingleTask> pageBean = PageBuilder.build(crawlSingleTasks);
|
||||
pageBean.setList(BeanUtil.copyList(crawlSingleTasks, CrawlSingleTaskVO.class));
|
||||
for (CrawlSingleTask crawlSingleTask : pageBean.getList()) {
|
||||
if (crawlSingleTask.getTaskStatus() == 2
|
||||
&& crawlParser.getCrawlTaskProgress(crawlSingleTask.getId()) != null) {
|
||||
// 如果排队中的任务有任务进度
|
||||
// 1.设置任务进度
|
||||
crawlSingleTask.setCrawlChapters(crawlParser.getCrawlTaskProgress(crawlSingleTask.getId()));
|
||||
// 2.将排队中的任务状态修改成采集中
|
||||
crawlSingleTask.setTaskStatus((byte) 3);
|
||||
}
|
||||
}
|
||||
return pageBean;
|
||||
}
|
||||
|
||||
@ -227,9 +233,13 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
excCount += 1;
|
||||
task.setExcCount(excCount);
|
||||
if (status == 1 || excCount == 5) {
|
||||
//当采集成功或者采集次数等于5,则更新采集最终状态,并停止采集
|
||||
// 当采集成功或者采集次数等于5,则更新采集最终状态,并停止采集
|
||||
task.setTaskStatus(status);
|
||||
}
|
||||
if (status == 1) {
|
||||
// 当采集成功,保存采集的章节数量
|
||||
task.setCrawlChapters(crawlParser.getCrawlTaskProgress(task.getId()));
|
||||
}
|
||||
crawlSingleTaskMapper.updateByPrimaryKeySelective(task);
|
||||
|
||||
}
|
||||
@ -244,6 +254,11 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer getTaskProgress(Long taskId) {
|
||||
return Optional.ofNullable(crawlParser.getCrawlTaskProgress(taskId)).orElse(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析分类列表
|
||||
*/
|
||||
@ -291,7 +306,7 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
}
|
||||
|
||||
String bookId = bookIdMatcher.group(1);
|
||||
parseBookAndSave(catId, ruleBean, sourceId, bookId);
|
||||
parseBookAndSave(catId, ruleBean, sourceId, bookId, null);
|
||||
} catch (InterruptedException e) {
|
||||
log.error(e.getMessage(), e);
|
||||
//1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时)
|
||||
@ -345,7 +360,7 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId)
|
||||
public boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId, CrawlSingleTask task)
|
||||
throws InterruptedException {
|
||||
|
||||
final AtomicBoolean parseResult = new AtomicBoolean(false);
|
||||
@ -378,7 +393,7 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
new HashMap<>(0), chapter -> {
|
||||
bookService.saveBookAndIndexAndContent(book, chapter.getBookIndexList(),
|
||||
chapter.getBookContentList());
|
||||
});
|
||||
}, task);
|
||||
parseResult.set(parseIndexContentResult);
|
||||
|
||||
} else {
|
||||
|
@ -48,6 +48,9 @@
|
||||
<th class="name">
|
||||
采集小说作者名
|
||||
</th>
|
||||
<th class="goread">
|
||||
采集进度
|
||||
</th>
|
||||
<th class="goread">
|
||||
采集次数
|
||||
</th>
|
||||
@ -113,9 +116,15 @@
|
||||
<script src="/javascript/header.js" type="text/javascript"></script>
|
||||
<script src="/javascript/user.js" type="text/javascript"></script>
|
||||
<script language="javascript" type="text/javascript">
|
||||
search(1, 10);
|
||||
let curr = 1;
|
||||
let limit = 10;
|
||||
|
||||
function search(curr, limit) {
|
||||
search();
|
||||
setInterval(function(){
|
||||
search();
|
||||
}, 10000);
|
||||
|
||||
function search() {
|
||||
|
||||
$.ajax({
|
||||
type: "get",
|
||||
@ -140,10 +149,13 @@
|
||||
" " + crawlSource.authorName + "\n" +
|
||||
" </td>\n" +
|
||||
" <td class=\"goread\">\n" +
|
||||
" " + crawlSource.crawlChapters + "\n" + "章" +
|
||||
" </td>\n" +
|
||||
" <td class=\"goread\">\n" +
|
||||
" " + crawlSource.excCount + "\n" +
|
||||
" </td>\n" +
|
||||
" <td class=\"goread\">\n" +
|
||||
" " + (crawlSource.taskStatus == 0 ? '采集失败' : (crawlSource.taskStatus == 1 ? '采集成功' : (crawlSource.excCount > 0 ? '采集中' : '排队中'))) + "\n" +
|
||||
" " + (crawlSource.taskStatus == 0 ? '采集失败' : (crawlSource.taskStatus == 1 ? '采集成功' : (crawlSource.taskStatus == 3 || crawlSource.excCount > 0 ? '采集中' : '排队中'))) + "\n" +
|
||||
" </td>\n" +
|
||||
" <td class=\"name\" valsc=\"291|2037554|1\">"
|
||||
+ crawlSource.createTime + "</td>\n" +
|
||||
@ -171,7 +183,9 @@
|
||||
|
||||
//首次不执行
|
||||
if (!first) {
|
||||
search(obj.curr, obj.limit);
|
||||
curr = obj.curr;
|
||||
limit = obj.limit;
|
||||
search();
|
||||
} else {
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user