mirror of
https://github.com/201206030/novel-plus.git
synced 2025-07-15 05:36:40 +00:00
feat(novel-crawl): 增加单本采集任务进度显示功能
This commit is contained in:
3
doc/sql/20250711.sql
Normal file
3
doc/sql/20250711.sql
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
alter table crawl_single_task add column crawl_chapters int DEFAULT 0 COMMENT '采集章节数量' after exc_count ;
|
||||||
|
|
||||||
|
|
@ -3159,3 +3159,4 @@ where menu_id = 57;
|
|||||||
alter table book_comment add column location varchar(50) DEFAULT NULL COMMENT '地理位置' after comment_content ;
|
alter table book_comment add column location varchar(50) DEFAULT NULL COMMENT '地理位置' after comment_content ;
|
||||||
|
|
||||||
|
|
||||||
|
alter table crawl_single_task add column crawl_chapters int DEFAULT 0 COMMENT '采集章节数量' after exc_count ;
|
||||||
|
@ -31,6 +31,9 @@ public class CrawlSingleTask {
|
|||||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
private Byte excCount;
|
private Byte excCount;
|
||||||
|
|
||||||
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
|
private Integer crawlChapters;
|
||||||
|
|
||||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
private Date createTime;
|
private Date createTime;
|
||||||
|
|
||||||
@ -124,6 +127,16 @@ public class CrawlSingleTask {
|
|||||||
this.excCount = excCount;
|
this.excCount = excCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
|
public Integer getCrawlChapters() {
|
||||||
|
return crawlChapters;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
|
public void setCrawlChapters(Integer crawlChapters) {
|
||||||
|
this.crawlChapters = crawlChapters;
|
||||||
|
}
|
||||||
|
|
||||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
public Date getCreateTime() {
|
public Date getCreateTime() {
|
||||||
return createTime;
|
return createTime;
|
||||||
|
@ -37,6 +37,9 @@ public final class CrawlSingleTaskDynamicSqlSupport {
|
|||||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
public static final SqlColumn<Byte> excCount = crawlSingleTask.excCount;
|
public static final SqlColumn<Byte> excCount = crawlSingleTask.excCount;
|
||||||
|
|
||||||
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
|
public static final SqlColumn<Integer> crawlChapters = crawlSingleTask.crawlChapters;
|
||||||
|
|
||||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
public static final SqlColumn<Date> createTime = crawlSingleTask.createTime;
|
public static final SqlColumn<Date> createTime = crawlSingleTask.createTime;
|
||||||
|
|
||||||
@ -60,6 +63,8 @@ public final class CrawlSingleTaskDynamicSqlSupport {
|
|||||||
|
|
||||||
public final SqlColumn<Byte> excCount = column("exc_count", JDBCType.TINYINT);
|
public final SqlColumn<Byte> excCount = column("exc_count", JDBCType.TINYINT);
|
||||||
|
|
||||||
|
public final SqlColumn<Integer> crawlChapters = column("crawl_chapters", JDBCType.INTEGER);
|
||||||
|
|
||||||
public final SqlColumn<Date> createTime = column("create_time", JDBCType.TIMESTAMP);
|
public final SqlColumn<Date> createTime = column("create_time", JDBCType.TIMESTAMP);
|
||||||
|
|
||||||
public CrawlSingleTask() {
|
public CrawlSingleTask() {
|
||||||
|
@ -35,7 +35,7 @@ import org.mybatis.dynamic.sql.util.mybatis3.MyBatis3Utils;
|
|||||||
@Mapper
|
@Mapper
|
||||||
public interface CrawlSingleTaskMapper {
|
public interface CrawlSingleTaskMapper {
|
||||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
BasicColumn[] selectList = BasicColumn.columnList(id, sourceId, sourceName, sourceBookId, catId, bookName, authorName, taskStatus, excCount, createTime);
|
BasicColumn[] selectList = BasicColumn.columnList(id, sourceId, sourceName, sourceBookId, catId, bookName, authorName, taskStatus, excCount, crawlChapters, createTime);
|
||||||
|
|
||||||
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
@Generated("org.mybatis.generator.api.MyBatisGenerator")
|
||||||
@SelectProvider(type=SqlProviderAdapter.class, method="select")
|
@SelectProvider(type=SqlProviderAdapter.class, method="select")
|
||||||
@ -70,6 +70,7 @@ public interface CrawlSingleTaskMapper {
|
|||||||
@Result(column="author_name", property="authorName", jdbcType=JdbcType.VARCHAR),
|
@Result(column="author_name", property="authorName", jdbcType=JdbcType.VARCHAR),
|
||||||
@Result(column="task_status", property="taskStatus", jdbcType=JdbcType.TINYINT),
|
@Result(column="task_status", property="taskStatus", jdbcType=JdbcType.TINYINT),
|
||||||
@Result(column="exc_count", property="excCount", jdbcType=JdbcType.TINYINT),
|
@Result(column="exc_count", property="excCount", jdbcType=JdbcType.TINYINT),
|
||||||
|
@Result(column="crawl_chapters", property="crawlChapters", jdbcType=JdbcType.INTEGER),
|
||||||
@Result(column="create_time", property="createTime", jdbcType=JdbcType.TIMESTAMP)
|
@Result(column="create_time", property="createTime", jdbcType=JdbcType.TIMESTAMP)
|
||||||
})
|
})
|
||||||
List<CrawlSingleTask> selectMany(SelectStatementProvider selectStatement);
|
List<CrawlSingleTask> selectMany(SelectStatementProvider selectStatement);
|
||||||
@ -107,6 +108,7 @@ public interface CrawlSingleTaskMapper {
|
|||||||
.map(authorName).toProperty("authorName")
|
.map(authorName).toProperty("authorName")
|
||||||
.map(taskStatus).toProperty("taskStatus")
|
.map(taskStatus).toProperty("taskStatus")
|
||||||
.map(excCount).toProperty("excCount")
|
.map(excCount).toProperty("excCount")
|
||||||
|
.map(crawlChapters).toProperty("crawlChapters")
|
||||||
.map(createTime).toProperty("createTime")
|
.map(createTime).toProperty("createTime")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -123,6 +125,7 @@ public interface CrawlSingleTaskMapper {
|
|||||||
.map(authorName).toProperty("authorName")
|
.map(authorName).toProperty("authorName")
|
||||||
.map(taskStatus).toProperty("taskStatus")
|
.map(taskStatus).toProperty("taskStatus")
|
||||||
.map(excCount).toProperty("excCount")
|
.map(excCount).toProperty("excCount")
|
||||||
|
.map(crawlChapters).toProperty("crawlChapters")
|
||||||
.map(createTime).toProperty("createTime")
|
.map(createTime).toProperty("createTime")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -139,6 +142,7 @@ public interface CrawlSingleTaskMapper {
|
|||||||
.map(authorName).toPropertyWhenPresent("authorName", record::getAuthorName)
|
.map(authorName).toPropertyWhenPresent("authorName", record::getAuthorName)
|
||||||
.map(taskStatus).toPropertyWhenPresent("taskStatus", record::getTaskStatus)
|
.map(taskStatus).toPropertyWhenPresent("taskStatus", record::getTaskStatus)
|
||||||
.map(excCount).toPropertyWhenPresent("excCount", record::getExcCount)
|
.map(excCount).toPropertyWhenPresent("excCount", record::getExcCount)
|
||||||
|
.map(crawlChapters).toPropertyWhenPresent("crawlChapters", record::getCrawlChapters)
|
||||||
.map(createTime).toPropertyWhenPresent("createTime", record::getCreateTime)
|
.map(createTime).toPropertyWhenPresent("createTime", record::getCreateTime)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -181,6 +185,7 @@ public interface CrawlSingleTaskMapper {
|
|||||||
.set(authorName).equalTo(record::getAuthorName)
|
.set(authorName).equalTo(record::getAuthorName)
|
||||||
.set(taskStatus).equalTo(record::getTaskStatus)
|
.set(taskStatus).equalTo(record::getTaskStatus)
|
||||||
.set(excCount).equalTo(record::getExcCount)
|
.set(excCount).equalTo(record::getExcCount)
|
||||||
|
.set(crawlChapters).equalTo(record::getCrawlChapters)
|
||||||
.set(createTime).equalTo(record::getCreateTime);
|
.set(createTime).equalTo(record::getCreateTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -195,6 +200,7 @@ public interface CrawlSingleTaskMapper {
|
|||||||
.set(authorName).equalToWhenPresent(record::getAuthorName)
|
.set(authorName).equalToWhenPresent(record::getAuthorName)
|
||||||
.set(taskStatus).equalToWhenPresent(record::getTaskStatus)
|
.set(taskStatus).equalToWhenPresent(record::getTaskStatus)
|
||||||
.set(excCount).equalToWhenPresent(record::getExcCount)
|
.set(excCount).equalToWhenPresent(record::getExcCount)
|
||||||
|
.set(crawlChapters).equalToWhenPresent(record::getCrawlChapters)
|
||||||
.set(createTime).equalToWhenPresent(record::getCreateTime);
|
.set(createTime).equalToWhenPresent(record::getCreateTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,6 +215,7 @@ public interface CrawlSingleTaskMapper {
|
|||||||
.set(authorName).equalTo(record::getAuthorName)
|
.set(authorName).equalTo(record::getAuthorName)
|
||||||
.set(taskStatus).equalTo(record::getTaskStatus)
|
.set(taskStatus).equalTo(record::getTaskStatus)
|
||||||
.set(excCount).equalTo(record::getExcCount)
|
.set(excCount).equalTo(record::getExcCount)
|
||||||
|
.set(crawlChapters).equalTo(record::getCrawlChapters)
|
||||||
.set(createTime).equalTo(record::getCreateTime)
|
.set(createTime).equalTo(record::getCreateTime)
|
||||||
.where(id, isEqualTo(record::getId))
|
.where(id, isEqualTo(record::getId))
|
||||||
);
|
);
|
||||||
@ -225,6 +232,7 @@ public interface CrawlSingleTaskMapper {
|
|||||||
.set(authorName).equalToWhenPresent(record::getAuthorName)
|
.set(authorName).equalToWhenPresent(record::getAuthorName)
|
||||||
.set(taskStatus).equalToWhenPresent(record::getTaskStatus)
|
.set(taskStatus).equalToWhenPresent(record::getTaskStatus)
|
||||||
.set(excCount).equalToWhenPresent(record::getExcCount)
|
.set(excCount).equalToWhenPresent(record::getExcCount)
|
||||||
|
.set(crawlChapters).equalToWhenPresent(record::getCrawlChapters)
|
||||||
.set(createTime).equalToWhenPresent(record::getCreateTime)
|
.set(createTime).equalToWhenPresent(record::getCreateTime)
|
||||||
.where(id, isEqualTo(record::getId))
|
.where(id, isEqualTo(record::getId))
|
||||||
);
|
);
|
||||||
|
@ -153,6 +153,14 @@ public class CrawlController {
|
|||||||
return RestResult.ok();
|
return RestResult.ok();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 采集任务进度查询
|
||||||
|
* */
|
||||||
|
@GetMapping("getTaskProgress/{id}")
|
||||||
|
public RestResult<Integer> getTaskProgress(@PathVariable("id") Long id){
|
||||||
|
return RestResult.ok(crawlService.getTaskProgress(id));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,25 +1,24 @@
|
|||||||
package com.java2nb.novel.core.crawl;
|
package com.java2nb.novel.core.crawl;
|
||||||
|
|
||||||
|
import com.java2nb.novel.core.cache.CacheKey;
|
||||||
|
import com.java2nb.novel.core.cache.CacheService;
|
||||||
import com.java2nb.novel.core.utils.RandomBookInfoUtil;
|
import com.java2nb.novel.core.utils.RandomBookInfoUtil;
|
||||||
import com.java2nb.novel.core.utils.StringUtil;
|
import com.java2nb.novel.core.utils.StringUtil;
|
||||||
import com.java2nb.novel.entity.Book;
|
import com.java2nb.novel.entity.Book;
|
||||||
import com.java2nb.novel.entity.BookContent;
|
import com.java2nb.novel.entity.BookContent;
|
||||||
import com.java2nb.novel.entity.BookIndex;
|
import com.java2nb.novel.entity.BookIndex;
|
||||||
|
import com.java2nb.novel.entity.CrawlSingleTask;
|
||||||
import com.java2nb.novel.utils.Constants;
|
import com.java2nb.novel.utils.Constants;
|
||||||
import com.java2nb.novel.utils.CrawlHttpClient;
|
import com.java2nb.novel.utils.CrawlHttpClient;
|
||||||
import io.github.xxyopen.util.IdWorker;
|
import io.github.xxyopen.util.IdWorker;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.SneakyThrows;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Date;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
@ -37,6 +36,18 @@ public class CrawlParser {
|
|||||||
|
|
||||||
private final CrawlHttpClient crawlHttpClient;
|
private final CrawlHttpClient crawlHttpClient;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 爬虫任务进度
|
||||||
|
*/
|
||||||
|
private final Map<Long, Integer> crawlTaskProgress = new HashMap<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 获取爬虫任务进度
|
||||||
|
*/
|
||||||
|
public Integer getCrawlTaskProgress(Long taskId) {
|
||||||
|
return crawlTaskProgress.get(taskId);
|
||||||
|
}
|
||||||
|
|
||||||
public void parseBook(RuleBean ruleBean, String bookId, CrawlBookHandler handler)
|
public void parseBook(RuleBean ruleBean, String bookId, CrawlBookHandler handler)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
Book book = new Book();
|
Book book = new Book();
|
||||||
@ -156,7 +167,7 @@ public class CrawlParser {
|
|||||||
} else if (book.getVisitCount() != null && book.getScore() == null) {
|
} else if (book.getVisitCount() != null && book.getScore() == null) {
|
||||||
//随机根据访问次数生成评分
|
//随机根据访问次数生成评分
|
||||||
book.setScore(RandomBookInfoUtil.getScoreByVisitCount(book.getVisitCount()));
|
book.setScore(RandomBookInfoUtil.getScoreByVisitCount(book.getVisitCount()));
|
||||||
} else if (book.getVisitCount() == null && book.getScore() == null) {
|
} else if (book.getVisitCount() == null) {
|
||||||
//都没有,设置成固定值
|
//都没有,设置成固定值
|
||||||
book.setVisitCount(Constants.VISIT_COUNT_DEFAULT);
|
book.setVisitCount(Constants.VISIT_COUNT_DEFAULT);
|
||||||
book.setScore(6.5f);
|
book.setScore(6.5f);
|
||||||
@ -167,7 +178,13 @@ public class CrawlParser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean,
|
public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean,
|
||||||
Map<Integer, BookIndex> existBookIndexMap, CrawlBookChapterHandler handler) throws InterruptedException {
|
Map<Integer, BookIndex> existBookIndexMap, CrawlBookChapterHandler handler, CrawlSingleTask task)
|
||||||
|
throws InterruptedException {
|
||||||
|
|
||||||
|
if (task != null) {
|
||||||
|
// 开始采集
|
||||||
|
crawlTaskProgress.put(task.getId(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
Date currentDate = new Date();
|
Date currentDate = new Date();
|
||||||
|
|
||||||
@ -225,7 +242,7 @@ public class CrawlParser {
|
|||||||
calResult = sourceIndexId.substring(0, sourceBookId.length() - y);
|
calResult = sourceIndexId.substring(0, sourceBookId.length() - y);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (calResult.length() == 0) {
|
if (calResult.isEmpty()) {
|
||||||
calResult = "0";
|
calResult = "0";
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -291,6 +308,11 @@ public class CrawlParser {
|
|||||||
}
|
}
|
||||||
bookIndex.setUpdateTime(currentDate);
|
bookIndex.setUpdateTime(currentDate);
|
||||||
|
|
||||||
|
if (task != null) {
|
||||||
|
// 更新采集进度
|
||||||
|
crawlTaskProgress.put(task.getId(), indexNum + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -300,10 +322,10 @@ public class CrawlParser {
|
|||||||
isFindIndex = indexIdMatch.find() & indexNameMatch.find();
|
isFindIndex = indexIdMatch.find() & indexNameMatch.find();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (indexList.size() > 0) {
|
if (!indexList.isEmpty()) {
|
||||||
//如果有爬到最新章节,则设置小说主表的最新章节信息
|
//如果有爬到最新章节,则设置小说主表的最新章节信息
|
||||||
//获取爬取到的最新章节
|
//获取爬取到的最新章节
|
||||||
BookIndex lastIndex = indexList.get(indexList.size() - 1);
|
BookIndex lastIndex = indexList.getLast();
|
||||||
book.setLastIndexId(lastIndex.getId());
|
book.setLastIndexId(lastIndex.getId());
|
||||||
book.setLastIndexName(lastIndex.getIndexName());
|
book.setLastIndexName(lastIndex.getIndexName());
|
||||||
book.setLastIndexUpdateTime(currentDate);
|
book.setLastIndexUpdateTime(currentDate);
|
||||||
@ -312,7 +334,7 @@ public class CrawlParser {
|
|||||||
book.setWordCount(totalWordCount);
|
book.setWordCount(totalWordCount);
|
||||||
book.setUpdateTime(currentDate);
|
book.setUpdateTime(currentDate);
|
||||||
|
|
||||||
if (indexList.size() == contentList.size() && indexList.size() > 0) {
|
if (indexList.size() == contentList.size() && !indexList.isEmpty()) {
|
||||||
|
|
||||||
handler.handle(new ChapterBean() {{
|
handler.handle(new ChapterBean() {{
|
||||||
setBookIndexList(indexList);
|
setBookIndexList(indexList);
|
||||||
|
@ -74,8 +74,10 @@ public class StarterListener implements ServletContextInitializer {
|
|||||||
needUpdateBook.getId());
|
needUpdateBook.getId());
|
||||||
//解析章节目录
|
//解析章节目录
|
||||||
crawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book,
|
crawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(), book,
|
||||||
ruleBean, existBookIndexMap, chapter -> bookService.updateBookAndIndexAndContent(book, chapter.getBookIndexList(),
|
ruleBean, existBookIndexMap,
|
||||||
chapter.getBookContentList(), existBookIndexMap));
|
chapter -> bookService.updateBookAndIndexAndContent(book,
|
||||||
|
chapter.getBookIndexList(),
|
||||||
|
chapter.getBookContentList(), existBookIndexMap), null);
|
||||||
});
|
});
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error(e.getMessage(), e);
|
log.error(e.getMessage(), e);
|
||||||
@ -107,9 +109,8 @@ public class StarterListener implements ServletContextInitializer {
|
|||||||
//查询爬虫规则
|
//查询爬虫规则
|
||||||
CrawlSource source = crawlService.queryCrawlSource(task.getSourceId());
|
CrawlSource source = crawlService.queryCrawlSource(task.getSourceId());
|
||||||
RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class);
|
RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class);
|
||||||
|
|
||||||
if (crawlService.parseBookAndSave(task.getCatId(), ruleBean, task.getSourceId(),
|
if (crawlService.parseBookAndSave(task.getCatId(), ruleBean, task.getSourceId(),
|
||||||
task.getSourceBookId())) {
|
task.getSourceBookId(), task)) {
|
||||||
//采集成功
|
//采集成功
|
||||||
crawlStatus = 1;
|
crawlStatus = 1;
|
||||||
}
|
}
|
||||||
@ -122,6 +123,7 @@ public class StarterListener implements ServletContextInitializer {
|
|||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error(e.getMessage(), e);
|
log.error(e.getMessage(), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (task != null) {
|
if (task != null) {
|
||||||
crawlService.updateCrawlSingleTask(task, crawlStatus);
|
crawlService.updateCrawlSingleTask(task, crawlStatus);
|
||||||
}
|
}
|
||||||
|
@ -47,13 +47,15 @@ public interface CrawlService {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 采集并保存小说
|
* 采集并保存小说
|
||||||
|
*
|
||||||
* @param catId 分类ID
|
* @param catId 分类ID
|
||||||
* @param bookId 小说ID
|
|
||||||
* @param sourceId 源ID
|
|
||||||
* @param ruleBean 采集规则\
|
* @param ruleBean 采集规则\
|
||||||
|
* @param sourceId 源ID
|
||||||
|
* @param bookId 小说ID
|
||||||
|
* @param task
|
||||||
* @return true:成功,false:失败
|
* @return true:成功,false:失败
|
||||||
* */
|
*/
|
||||||
boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId) throws InterruptedException;
|
boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId, CrawlSingleTask task) throws InterruptedException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 根据爬虫状态查询爬虫源集合
|
* 根据爬虫状态查询爬虫源集合
|
||||||
@ -117,4 +119,9 @@ public interface CrawlService {
|
|||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
CrawlSource getCrawlSource(Integer id);
|
CrawlSource getCrawlSource(Integer id);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 采集任务进度查询
|
||||||
|
* */
|
||||||
|
Integer getTaskProgress(Long taskId);
|
||||||
}
|
}
|
||||||
|
@ -2,12 +2,10 @@ package com.java2nb.novel.service.impl;
|
|||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import com.github.pagehelper.PageHelper;
|
import com.github.pagehelper.PageHelper;
|
||||||
import com.java2nb.novel.core.cache.CacheKey;
|
|
||||||
import com.java2nb.novel.core.cache.CacheService;
|
import com.java2nb.novel.core.cache.CacheService;
|
||||||
import com.java2nb.novel.core.crawl.CrawlParser;
|
import com.java2nb.novel.core.crawl.CrawlParser;
|
||||||
import com.java2nb.novel.core.crawl.RuleBean;
|
import com.java2nb.novel.core.crawl.RuleBean;
|
||||||
import com.java2nb.novel.core.enums.ResponseStatus;
|
import com.java2nb.novel.core.enums.ResponseStatus;
|
||||||
import com.java2nb.novel.core.utils.SpringUtil;
|
|
||||||
import com.java2nb.novel.entity.Book;
|
import com.java2nb.novel.entity.Book;
|
||||||
import com.java2nb.novel.entity.CrawlSingleTask;
|
import com.java2nb.novel.entity.CrawlSingleTask;
|
||||||
import com.java2nb.novel.entity.CrawlSource;
|
import com.java2nb.novel.entity.CrawlSource;
|
||||||
@ -60,8 +58,6 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
|
|
||||||
private final BookService bookService;
|
private final BookService bookService;
|
||||||
|
|
||||||
private final CacheService cacheService;
|
|
||||||
|
|
||||||
private final IdWorker idWorker = IdWorker.INSTANCE;
|
private final IdWorker idWorker = IdWorker.INSTANCE;
|
||||||
|
|
||||||
private final CrawlHttpClient crawlHttpClient;
|
private final CrawlHttpClient crawlHttpClient;
|
||||||
@ -198,6 +194,16 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
List<CrawlSingleTask> crawlSingleTasks = crawlSingleTaskMapper.selectMany(render);
|
List<CrawlSingleTask> crawlSingleTasks = crawlSingleTaskMapper.selectMany(render);
|
||||||
PageBean<CrawlSingleTask> pageBean = PageBuilder.build(crawlSingleTasks);
|
PageBean<CrawlSingleTask> pageBean = PageBuilder.build(crawlSingleTasks);
|
||||||
pageBean.setList(BeanUtil.copyList(crawlSingleTasks, CrawlSingleTaskVO.class));
|
pageBean.setList(BeanUtil.copyList(crawlSingleTasks, CrawlSingleTaskVO.class));
|
||||||
|
for (CrawlSingleTask crawlSingleTask : pageBean.getList()) {
|
||||||
|
if (crawlSingleTask.getTaskStatus() == 2
|
||||||
|
&& crawlParser.getCrawlTaskProgress(crawlSingleTask.getId()) != null) {
|
||||||
|
// 如果排队中的任务有任务进度
|
||||||
|
// 1.设置任务进度
|
||||||
|
crawlSingleTask.setCrawlChapters(crawlParser.getCrawlTaskProgress(crawlSingleTask.getId()));
|
||||||
|
// 2.将排队中的任务状态修改成采集中
|
||||||
|
crawlSingleTask.setTaskStatus((byte) 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
return pageBean;
|
return pageBean;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -230,6 +236,10 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
// 当采集成功或者采集次数等于5,则更新采集最终状态,并停止采集
|
// 当采集成功或者采集次数等于5,则更新采集最终状态,并停止采集
|
||||||
task.setTaskStatus(status);
|
task.setTaskStatus(status);
|
||||||
}
|
}
|
||||||
|
if (status == 1) {
|
||||||
|
// 当采集成功,保存采集的章节数量
|
||||||
|
task.setCrawlChapters(crawlParser.getCrawlTaskProgress(task.getId()));
|
||||||
|
}
|
||||||
crawlSingleTaskMapper.updateByPrimaryKeySelective(task);
|
crawlSingleTaskMapper.updateByPrimaryKeySelective(task);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -244,6 +254,11 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer getTaskProgress(Long taskId) {
|
||||||
|
return Optional.ofNullable(crawlParser.getCrawlTaskProgress(taskId)).orElse(0);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 解析分类列表
|
* 解析分类列表
|
||||||
*/
|
*/
|
||||||
@ -291,7 +306,7 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
String bookId = bookIdMatcher.group(1);
|
String bookId = bookIdMatcher.group(1);
|
||||||
parseBookAndSave(catId, ruleBean, sourceId, bookId);
|
parseBookAndSave(catId, ruleBean, sourceId, bookId, null);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
log.error(e.getMessage(), e);
|
log.error(e.getMessage(), e);
|
||||||
//1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时)
|
//1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时)
|
||||||
@ -345,7 +360,7 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId)
|
public boolean parseBookAndSave(int catId, RuleBean ruleBean, Integer sourceId, String bookId, CrawlSingleTask task)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
|
|
||||||
final AtomicBoolean parseResult = new AtomicBoolean(false);
|
final AtomicBoolean parseResult = new AtomicBoolean(false);
|
||||||
@ -378,7 +393,7 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
new HashMap<>(0), chapter -> {
|
new HashMap<>(0), chapter -> {
|
||||||
bookService.saveBookAndIndexAndContent(book, chapter.getBookIndexList(),
|
bookService.saveBookAndIndexAndContent(book, chapter.getBookIndexList(),
|
||||||
chapter.getBookContentList());
|
chapter.getBookContentList());
|
||||||
});
|
}, task);
|
||||||
parseResult.set(parseIndexContentResult);
|
parseResult.set(parseIndexContentResult);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -48,6 +48,9 @@
|
|||||||
<th class="name">
|
<th class="name">
|
||||||
采集小说作者名
|
采集小说作者名
|
||||||
</th>
|
</th>
|
||||||
|
<th class="goread">
|
||||||
|
采集进度
|
||||||
|
</th>
|
||||||
<th class="goread">
|
<th class="goread">
|
||||||
采集次数
|
采集次数
|
||||||
</th>
|
</th>
|
||||||
@ -113,9 +116,15 @@
|
|||||||
<script src="/javascript/header.js" type="text/javascript"></script>
|
<script src="/javascript/header.js" type="text/javascript"></script>
|
||||||
<script src="/javascript/user.js" type="text/javascript"></script>
|
<script src="/javascript/user.js" type="text/javascript"></script>
|
||||||
<script language="javascript" type="text/javascript">
|
<script language="javascript" type="text/javascript">
|
||||||
search(1, 10);
|
let curr = 1;
|
||||||
|
let limit = 10;
|
||||||
|
|
||||||
function search(curr, limit) {
|
search();
|
||||||
|
setInterval(function(){
|
||||||
|
search();
|
||||||
|
}, 10000);
|
||||||
|
|
||||||
|
function search() {
|
||||||
|
|
||||||
$.ajax({
|
$.ajax({
|
||||||
type: "get",
|
type: "get",
|
||||||
@ -140,10 +149,13 @@
|
|||||||
" " + crawlSource.authorName + "\n" +
|
" " + crawlSource.authorName + "\n" +
|
||||||
" </td>\n" +
|
" </td>\n" +
|
||||||
" <td class=\"goread\">\n" +
|
" <td class=\"goread\">\n" +
|
||||||
|
" " + crawlSource.crawlChapters + "\n" + "章" +
|
||||||
|
" </td>\n" +
|
||||||
|
" <td class=\"goread\">\n" +
|
||||||
" " + crawlSource.excCount + "\n" +
|
" " + crawlSource.excCount + "\n" +
|
||||||
" </td>\n" +
|
" </td>\n" +
|
||||||
" <td class=\"goread\">\n" +
|
" <td class=\"goread\">\n" +
|
||||||
" " + (crawlSource.taskStatus == 0 ? '采集失败' : (crawlSource.taskStatus == 1 ? '采集成功' : (crawlSource.excCount > 0 ? '采集中' : '排队中'))) + "\n" +
|
" " + (crawlSource.taskStatus == 0 ? '采集失败' : (crawlSource.taskStatus == 1 ? '采集成功' : (crawlSource.taskStatus == 3 || crawlSource.excCount > 0 ? '采集中' : '排队中'))) + "\n" +
|
||||||
" </td>\n" +
|
" </td>\n" +
|
||||||
" <td class=\"name\" valsc=\"291|2037554|1\">"
|
" <td class=\"name\" valsc=\"291|2037554|1\">"
|
||||||
+ crawlSource.createTime + "</td>\n" +
|
+ crawlSource.createTime + "</td>\n" +
|
||||||
@ -171,7 +183,9 @@
|
|||||||
|
|
||||||
//首次不执行
|
//首次不执行
|
||||||
if (!first) {
|
if (!first) {
|
||||||
search(obj.curr, obj.limit);
|
curr = obj.curr;
|
||||||
|
limit = obj.limit;
|
||||||
|
search();
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user