diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java b/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java
index 586161c..64bcb46 100644
--- a/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java
+++ b/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java
@@ -19,6 +19,7 @@ import xyz.zinglizingli.common.constant.CacheKeyConstans;
import xyz.zinglizingli.common.enums.PicSaveType;
import xyz.zinglizingli.books.mapper.*;
import xyz.zinglizingli.books.po.*;
+import xyz.zinglizingli.common.utils.Constants;
import xyz.zinglizingli.common.utils.UUIDUtils;
import xyz.zinglizingli.common.cache.CommonCacheUtil;
import xyz.zinglizingli.common.utils.RestTemplateUtil;
@@ -96,7 +97,7 @@ public class BookService {
List newContentList = new ArrayList<>();
for (int i = 0; i < bookIndex.size(); i++) {
BookContent bookContentItem = bookContent.get(i);
- if (!bookContentItem.getContent().contains("正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新")) {
+ if (!bookContentItem.getContent().contains(Constants.NO_CONTENT_DESC)) {
BookIndex bookIndexItem = bookIndex.get(i);
bookIndexItem.setBookId(bookId);
bookContentItem.setBookId(bookId);
@@ -207,57 +208,7 @@ public class BookService {
}
- /**
- * 获取分类名
- * */
- public String getCatNameById(Integer catid) {
- String catName = "其他";
- switch (catid) {
- case 1: {
- catName = "玄幻奇幻";
- break;
- }
- case 2: {
- catName = "武侠仙侠";
- break;
- }
- case 3: {
- catName = "都市言情";
- break;
- }
- case 4: {
- catName = "历史军事";
- break;
- }
- case 5: {
- catName = "科幻灵异";
- break;
- }
- case 6: {
- catName = "网游竞技";
- break;
- }
- case 7: {
- catName = "女生频道";
- break;
- }
- case 8: {
- catName = "轻小说";
- break;
- }
- case 9: {
- catName = "漫画";
- break;
- }
- default: {
- break;
- }
-
-
- }
- return catName;
- }
/**
* 查询书籍的基础数据
@@ -374,87 +325,6 @@ public class BookService {
- /**
- * 查询轻小说分类名
- * */
- public String getSoftCatNameById(Integer softCat) {
- String catName = "其他";
-
- switch (softCat) {
- case 21: {
- catName = "魔幻";
- break;
- }
- case 22: {
- catName = "玄幻";
- break;
- }
- case 23: {
- catName = "古风";
- break;
- }
- case 24: {
- catName = "科幻";
- break;
- }
- case 25: {
- catName = "校园";
- break;
- }
- case 26: {
- catName = "都市";
- break;
- }
- case 27: {
- catName = "游戏";
- break;
- }
- case 28: {
- catName = "同人";
- break;
- }
- case 29: {
- catName = "悬疑";
- break;
- }
- case 0: {
- catName = "动漫";
- break;
- }
- default: {
- break;
- }
-
-
- }
- return catName;
-
- }
-
- /**
- * 查询漫画分类名
- * */
- public String getMhCatNameById(Integer softCat) {
- String catName = "其他";
-
- switch (softCat) {
- case 3262: {
- catName = "少年漫";
- break;
- }
- case 3263: {
- catName = "少女漫";
- break;
- }
- default: {
- break;
- }
-
-
- }
- return catName;
-
- }
/**
* 保存弹幕
diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/web/ApiBookController.java b/novel-front/src/main/java/xyz/zinglizingli/books/web/ApiBookController.java
index d25824c..2ed7664 100644
--- a/novel-front/src/main/java/xyz/zinglizingli/books/web/ApiBookController.java
+++ b/novel-front/src/main/java/xyz/zinglizingli/books/web/ApiBookController.java
@@ -15,6 +15,8 @@ import xyz.zinglizingli.books.po.BookIndex;
import xyz.zinglizingli.books.service.BookService;
import xyz.zinglizingli.books.vo.BookVO;
import xyz.zinglizingli.common.cache.CommonCacheUtil;
+import xyz.zinglizingli.common.utils.CatUtil;
+import xyz.zinglizingli.common.utils.Constants;
import java.util.*;
@@ -71,7 +73,7 @@ public class ApiBookController {
String userId = null;
String titleType = "最近更新";
if (catId != null) {
- titleType = bookService.getCatNameById(catId);
+ titleType = CatUtil.getCatNameById(catId);
} else if (keyword != null) {
titleType = "搜索";
} else if ("score".equals(sortBy)) {
@@ -90,7 +92,7 @@ public class ApiBookController {
for (Book book : books) {
BookVO bookvo = new BookVO();
BeanUtils.copyProperties(book, bookvo);
- bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
+ bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
bookVOList.add(bookvo);
}
@@ -103,7 +105,7 @@ public class ApiBookController {
int index = idsArr.indexOf(book.getId() + "");
BookVO bookvo = new BookVO();
BeanUtils.copyProperties(book, bookvo);
- bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
+ bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
bookVOArr[length - index - 1] = bookvo;
}
bookVOList = Arrays.asList(bookVOArr);
@@ -139,7 +141,7 @@ public class ApiBookController {
BookVO bookvo = new BookVO();
BeanUtils.copyProperties(book, bookvo);
- bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
+ bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
modelMap.put("bookId", bookId);
modelMap.put("book", bookvo);
modelMap.put("indexList", indexList);
@@ -185,7 +187,7 @@ public class ApiBookController {
bookContent.setId(-1L);
bookContent.setBookId(bookId);
bookContent.setIndexNum(indexNum);
- bookContent.setContent("正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新");
+ bookContent.setContent(Constants.NO_CONTENT_DESC);
indexName="?";
}else{
indexName = bookService.queryIndexNameByBookIdAndIndexNum(bookId, indexNum);
diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/web/BookController.java b/novel-front/src/main/java/xyz/zinglizingli/books/web/BookController.java
index 2abdcb2..3233e20 100644
--- a/novel-front/src/main/java/xyz/zinglizingli/books/web/BookController.java
+++ b/novel-front/src/main/java/xyz/zinglizingli/books/web/BookController.java
@@ -20,6 +20,7 @@ import xyz.zinglizingli.books.service.BookService;
import xyz.zinglizingli.books.service.UserService;
import xyz.zinglizingli.books.vo.BookVO;
import xyz.zinglizingli.common.cache.CommonCacheUtil;
+import xyz.zinglizingli.common.utils.CatUtil;
import xyz.zinglizingli.common.utils.Constants;
import javax.servlet.http.HttpServletResponse;
@@ -62,7 +63,7 @@ public class BookController {
String userId = null;
String titleType = "最近更新";
if (catId != null) {
- titleType = bookService.getCatNameById(catId) + "分类频道";
+ titleType = CatUtil.getCatNameById(catId) + "分类频道";
} else if (Constants.NOVEL_TOP_FIELD.equals(sortBy)) {
titleType = "小说排行";
} else if (ids != null) {
@@ -84,7 +85,7 @@ public class BookController {
for (Book book : books) {
BookVO bookvo = new BookVO();
BeanUtils.copyProperties(book, bookvo);
- bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
+ bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
bookVoList.add(bookvo);
}
@@ -97,7 +98,7 @@ public class BookController {
int index = idsArr.indexOf(book.getId() + "");
BookVO bookvo = new BookVO();
BeanUtils.copyProperties(book, bookvo);
- bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
+ bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
bookVoArr[books.size() - index - 1] = bookvo;
}
bookVoList = Arrays.asList(bookVoArr);
@@ -144,10 +145,10 @@ public class BookController {
BeanUtils.copyProperties(book, bookvo);
if(catId == Constants.SOFT_NOVEL_CAT) {
//轻小说
- bookvo.setCateName(bookService.getSoftCatNameById(bookvo.getSoftCat()));
+ bookvo.setCateName(CatUtil.getSoftCatNameById(bookvo.getSoftCat()));
}else if(catId == Constants.MH_NOVEL_CAT){
//漫画
- bookvo.setCateName(bookService.getMhCatNameById(bookvo.getSoftCat()));
+ bookvo.setCateName(CatUtil.getMhCatNameById(bookvo.getSoftCat()));
}
bookVoList.add(bookvo);
}
@@ -204,7 +205,7 @@ public class BookController {
BookVO bookvo = new BookVO();
BeanUtils.copyProperties(book, bookvo);
- bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
+ bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
modelMap.put("bookId", bookId);
modelMap.put("book", bookvo);
@@ -243,7 +244,7 @@ public class BookController {
bookContent.setId(-1L);
bookContent.setBookId(bookId);
bookContent.setIndexNum(indexNum);
- bookContent.setContent("正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新");
+ bookContent.setContent(Constants.NO_CONTENT_DESC);
indexName = "更新中。。。";
} else {
indexName = bookService.queryIndexNameByBookIdAndIndexNum(bookId, indexNum);
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/config/CrawlBiqudaoConfig.java b/novel-front/src/main/java/xyz/zinglizingli/common/config/CrawlBiqudaoConfig.java
new file mode 100644
index 0000000..ec547ca
--- /dev/null
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/config/CrawlBiqudaoConfig.java
@@ -0,0 +1,27 @@
+package xyz.zinglizingli.common.config;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import xyz.zinglizingli.common.crawl.BaseHtmlCrawlSource;
+import xyz.zinglizingli.common.crawl.BiquCrawlSource;
+
+/**
+ * @author 11797
+ */
+@Slf4j
+@Configuration
+public class CrawlBiqudaoConfig {
+
+
+ @Bean
+ @ConfigurationProperties(prefix = "biqudao.crawlsource") // prefix值必须是application.yml中对应属性的前缀
+ @ConditionalOnProperty(prefix = "biqudao.crawlsource",name = "enabled",havingValue = "true")
+ public BaseHtmlCrawlSource BiqutaCrawlSource() {
+ return new BiquCrawlSource();
+ }
+
+
+}
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/config/CrawlBiqutaConfig.java b/novel-front/src/main/java/xyz/zinglizingli/common/config/CrawlBiqutaConfig.java
new file mode 100644
index 0000000..6ff5b61
--- /dev/null
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/config/CrawlBiqutaConfig.java
@@ -0,0 +1,29 @@
+package xyz.zinglizingli.common.config;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Primary;
+import xyz.zinglizingli.common.crawl.BaseHtmlCrawlSource;
+import xyz.zinglizingli.common.crawl.BiquCrawlSource;
+
+/**
+ * @author 11797
+ */
+@Slf4j
+@Configuration
+public class CrawlBiqutaConfig {
+
+
+ @Bean
+ @Primary //必须加此注解,不然报错,下一个类则不需要添加
+ @ConfigurationProperties(prefix = "biquta.crawlsource") // prefix值必须是application.yml中对应属性的前缀
+ @ConditionalOnProperty(prefix = "biquta.crawlsource",name = "enabled",havingValue = "true")
+ public BaseHtmlCrawlSource BiqutaCrawlSource() {
+ return new BiquCrawlSource();
+ }
+
+
+}
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BaseCrawlSource.java b/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BaseCrawlSource.java
new file mode 100644
index 0000000..ff923cb
--- /dev/null
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BaseCrawlSource.java
@@ -0,0 +1,20 @@
+package xyz.zinglizingli.common.crawl;
+
+import lombok.Data;
+import org.springframework.beans.factory.annotation.Value;
+
+/**
+ * 爬虫源
+ * @author 11797
+ */
+@Data
+public abstract class BaseCrawlSource {
+
+ @Value("${books.lowestScore}")
+ private Float lowestScore;
+
+ /**
+ * 解析数据
+ * */
+ public abstract void parse();
+}
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BaseHtmlCrawlSource.java b/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BaseHtmlCrawlSource.java
new file mode 100644
index 0000000..2bd761a
--- /dev/null
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BaseHtmlCrawlSource.java
@@ -0,0 +1,81 @@
+package xyz.zinglizingli.common.crawl;
+
+import lombok.Data;
+
+/**
+ * html爬虫源
+ * @author 11797
+ */
+@Data
+public abstract class BaseHtmlCrawlSource extends BaseCrawlSource{
+
+ /**
+ * 首页url
+ * */
+ private String indexUrl;
+
+ /**
+ * 列表页url
+ * */
+ private String listPageUrl;
+
+ /**
+ * 书籍url Pattern
+ * */
+ private String bookUrlPattern;
+
+ /**
+ * 评分 Pattern
+ * */
+ private String scorePattern;
+
+ /**
+ * 书名 Pattern
+ * */
+ private String bookNamePattern;
+
+ /**
+ * 作者 Pattern
+ * */
+ private String authorPattern;
+
+ /**
+ * 状态 Pattern
+ * */
+ private String statusPattern;
+
+ /**
+ * 类别 Pattern
+ * */
+ private String catPattern;
+
+
+ /**
+ * 更新时间 Pattern
+ * */
+ private String updateTimePattern;
+
+
+ /**
+ * 封面 Pattern
+ * */
+ private String picPattern;
+
+
+ /**
+ * 简介 Pattern
+ * */
+ private String introPattern;
+
+ /**
+ * 完整目录页url Pattern
+ * */
+ private String catalogUrlPattern;
+
+ /**
+ * 目录 Pattern
+ * */
+ private String catalogPattern;
+
+
+}
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BaseJsonCrawlSource.java b/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BaseJsonCrawlSource.java
new file mode 100644
index 0000000..2132325
--- /dev/null
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BaseJsonCrawlSource.java
@@ -0,0 +1,8 @@
+package xyz.zinglizingli.common.crawl;
+
+/**
+ * Json爬虫源
+ * @author 11797
+ */
+public abstract class BaseJsonCrawlSource extends BaseCrawlSource{
+}
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BiquCrawlSource.java b/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BiquCrawlSource.java
new file mode 100644
index 0000000..1c8a694
--- /dev/null
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/crawl/BiquCrawlSource.java
@@ -0,0 +1,216 @@
+package xyz.zinglizingli.common.crawl;
+
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+import xyz.zinglizingli.books.po.Book;
+import xyz.zinglizingli.books.po.BookContent;
+import xyz.zinglizingli.books.po.BookIndex;
+import xyz.zinglizingli.books.service.BookService;
+import xyz.zinglizingli.common.utils.CatUtil;
+import xyz.zinglizingli.common.utils.ExcutorUtils;
+import xyz.zinglizingli.common.utils.RestTemplateUtil;
+
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static java.util.regex.Pattern.compile;
+
+/**
+ * @author 11797
+ */
+@Slf4j
+public class BiquCrawlSource extends BaseHtmlCrawlSource {
+
+
+ @Autowired
+ private BookService bookService;
+
+ @Override
+ public void parse() {
+
+ String catBookListUrl = getListPageUrl().replace("{0}", "0").replace("{1}", "1");
+ String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl);
+ if (forObject != null) {
+ //解析第一页书籍的数据
+ Pattern bookPatten = compile(getBookUrlPattern());
+
+ Matcher bookMatcher = bookPatten.matcher(forObject);
+
+ boolean isFind = bookMatcher.find();
+ Pattern scorePatten = compile(getScorePattern());
+ Matcher scoreMatch = scorePatten.matcher(forObject);
+ boolean scoreFind = scoreMatch.find();
+
+ Pattern bookNamePatten = compile(getBookNamePattern());
+
+ Matcher bookNameMatch = bookNamePatten.matcher(forObject);
+
+ boolean isBookNameMatch = bookNameMatch.find();
+
+ while (isFind && scoreFind && isBookNameMatch) {
+
+ try {
+ Float score = Float.parseFloat(scoreMatch.group(1));
+
+ if (score < getLowestScore()) {
+ continue;
+ }
+
+ String bokNum = bookMatcher.group(1);
+ String bookUrl = getIndexUrl() + "/" + bokNum + "/";
+
+ String body = RestTemplateUtil.getBodyByUtf8(bookUrl);
+ if (body != null) {
+
+ String bookName = bookNameMatch.group(1);
+ Pattern authorPatten = compile(getAuthorPattern());
+ Matcher authoreMatch = authorPatten.matcher(body);
+ if (authoreMatch.find()) {
+ String author = authoreMatch.group(1);
+
+ Pattern statusPatten = compile(getStatusPattern());
+ Matcher statusMatch = statusPatten.matcher(body);
+ if (statusMatch.find()) {
+ String status = statusMatch.group(1);
+
+ Pattern catPatten = compile(getCatPattern());
+ Matcher catMatch = catPatten.matcher(body);
+ if (catMatch.find()) {
+ String catName = catMatch.group(1);
+ int catNum = CatUtil.getCatNum(catName);
+
+
+ Pattern updateTimePatten = compile(getUpdateTimePattern());
+ Matcher updateTimeMatch = updateTimePatten.matcher(body);
+ if (updateTimeMatch.find()) {
+ String updateTimeStr = updateTimeMatch.group(1);
+ SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
+ Date updateTime = format.parse(updateTimeStr);
+ Pattern picPatten = compile(getPicPattern());
+ Matcher picMather = picPatten.matcher(body);
+ if (picMather.find()) {
+ String picSrc = picMather.group(1);
+
+
+ Pattern descPatten = compile(getIntroPattern());
+ Matcher descMatch = descPatten.matcher(body);
+ if (descMatch.find()) {
+ String desc = descMatch.group(1);
+
+
+ Book book = new Book();
+ book.setAuthor(author);
+ book.setCatid(catNum);
+ book.setBookDesc(desc);
+ book.setBookName(bookName);
+ book.setScore(score > 10 ? 8.0f : score);
+ book.setPicUrl(picSrc);
+ book.setBookStatus(status);
+ book.setUpdateTime(updateTime);
+
+ List indexList = new ArrayList<>();
+ List contentList = new ArrayList<>();
+
+ //读取目录
+ Pattern indexPatten = compile(getCatalogUrlPattern());
+ Matcher indexMatch = indexPatten.matcher(body);
+ if (indexMatch.find()) {
+ String indexUrl = getIndexUrl() + indexMatch.group(1);
+ String body2 = RestTemplateUtil.getBodyByUtf8(indexUrl);
+ if (body2 != null) {
+ Pattern indexListPatten = compile(getCatalogPattern());
+ Matcher indexListMatch = indexListPatten.matcher(body2);
+
+ boolean isFindIndex = indexListMatch.find();
+
+ int indexNum = 0;
+
+ //查询该书籍已存在目录号
+ List hasIndexNum = bookService.queryIndexNumByBookNameAndAuthor(bookName, author);
+ //更新和插入分别开,插入只在凌晨做一次
+ if (hasIndexNum.size() > 0) {
+ while (isFindIndex) {
+ if (!hasIndexNum.contains(indexNum)) {
+
+ String contentUrl = getIndexUrl() + indexListMatch.group(1);
+ String indexName = indexListMatch.group(2);
+
+
+ //查询章节内容
+ String body3 = RestTemplateUtil.getBodyByUtf8(contentUrl);
+ if (body3 != null) {
+ String start = "『章节错误,点此举报』";
+ String end = "『加入书签,方便阅读』";
+ String content = body3.substring(body3.indexOf(start) + start.length(), body3.indexOf(end));
+ //TODO插入章节目录和章节内容
+ BookIndex bookIndex = new BookIndex();
+ bookIndex.setIndexName(indexName);
+ bookIndex.setIndexNum(indexNum);
+ indexList.add(bookIndex);
+ BookContent bookContent = new BookContent();
+ bookContent.setContent(content);
+ bookContent.setIndexNum(indexNum);
+ contentList.add(bookContent);
+
+
+ } else {
+ break;
+ }
+
+
+ }
+ indexNum++;
+ isFindIndex = indexListMatch.find();
+ }
+
+ if (indexList.size() == contentList.size() && indexList.size() > 0) {
+ ExcutorUtils.excuteFixedTask(() ->
+ bookService.saveBookAndIndexAndContent(book, indexList, contentList)
+ );
+
+ }
+ }
+ }
+
+
+ }
+
+
+ }
+
+ }
+ }
+ }
+ }
+
+
+ }
+
+ }
+
+
+ } catch (Exception e) {
+
+ e.printStackTrace();
+
+ } finally {
+ bookMatcher.find();
+ isFind = bookMatcher.find();
+ scoreFind = scoreMatch.find();
+ isBookNameMatch = bookNameMatch.find();
+ }
+
+
+ }
+ }
+
+ }
+
+
+
+}
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/schedule/CrawlBooksSchedule.java b/novel-front/src/main/java/xyz/zinglizingli/common/schedule/CrawlBooksSchedule.java
index 1d377af..3c1dba5 100644
--- a/novel-front/src/main/java/xyz/zinglizingli/common/schedule/CrawlBooksSchedule.java
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/schedule/CrawlBooksSchedule.java
@@ -3,28 +3,13 @@ package xyz.zinglizingli.common.schedule;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.codec.Charsets;
-import org.springframework.beans.factory.annotation.Value;
-import org.springframework.http.HttpStatus;
-import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
-import xyz.zinglizingli.books.po.Book;
-import xyz.zinglizingli.books.po.BookContent;
-import xyz.zinglizingli.books.po.BookIndex;
import xyz.zinglizingli.books.service.BookService;
-import xyz.zinglizingli.common.utils.ExcutorUtils;
+import xyz.zinglizingli.common.crawl.BaseCrawlSource;
import xyz.zinglizingli.common.utils.RestTemplateUtil;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import static java.util.regex.Pattern.compile;
-
/**
* 更新书籍章节内容定时任务
*
@@ -35,19 +20,8 @@ import static java.util.regex.Pattern.compile;
@Slf4j
public class CrawlBooksSchedule {
- private final BookService bookService;
- private RestTemplate utf8RestTemplate = RestTemplateUtil.getInstance(Charsets.UTF_8);
-
-
- @Value("${books.lowestScore}")
- private Float lowestScore;
-
- @Value("${crawl.website.type}")
- private Byte websiteType;
-
- @Value("${pic.save.path}")
- private String picSavePath;
+ private final BaseCrawlSource crawlSource;
/**
@@ -58,459 +32,11 @@ public class CrawlBooksSchedule {
log.debug("crawlBooksSchedule执行中。。。。。。。。。。。。");
- switch (websiteType) {
- case 1: {
- updateBiqudaoBooks(0);
- break;
- }
- case 2: {
- updateBiquTaBooks(0);
- break;
- }
- default: {
- break;
- }
- }
+ crawlSource.parse();
}
- /**
- * 从笔趣塔更新
- */
- private void updateBiquTaBooks(int bookClass) {
- String baseUrl = "https://m.biquta.la";
- String catBookListUrlBase = baseUrl + "/class/";
-
- String catBookListUrl = catBookListUrlBase + bookClass + "/" + 1 + ".html";
- String forObject = getByRestTemplate(catBookListUrl);
- if (forObject != null) {
- Pattern pattern = compile("value=\"(\\d+)/(\\d+)\"");
- Matcher matcher = pattern.matcher(forObject);
- boolean isFind = matcher.find();
- if (isFind) {
- //解析第一页书籍的数据
- Pattern bookPatten = compile("href=\"/(\\d+_\\d+)/\"");
- parseBiquTaBook(bookPatten, forObject, baseUrl);
- }
- }
- }
-
- /**
- * 解析笔趣塔数据
- */
- private void parseBiquTaBook(Pattern bookPatten, String forObject, String baseUrl) {
- Matcher bookMatcher = bookPatten.matcher(forObject);
-
- boolean isFind = bookMatcher.find();
- Pattern scorePatten = compile("(\\d+\\.\\d+)分
");
- Matcher scoreMatch = scorePatten.matcher(forObject);
- boolean scoreFind = scoreMatch.find();
-
- Pattern bookNamePatten = compile("([^/]+)
");
- Matcher bookNameMatch = bookNamePatten.matcher(forObject);
- boolean isBookNameMatch = bookNameMatch.find();
-
- while (isFind && scoreFind && isBookNameMatch) {
-
- try {
- Float score = Float.parseFloat(scoreMatch.group(1));
-
- if (score < lowestScore) {
- continue;
- }
-
- String bokNum = bookMatcher.group(1);
- String bookUrl = baseUrl + "/" + bokNum + "/";
-
- String body = getByRestTemplate(bookUrl);
- if (body != null) {
-
- String bookName = bookNameMatch.group(1);
- Pattern authorPatten = compile(">作者:([^/]+)<");
- Matcher authoreMatch = authorPatten.matcher(body);
- if (authoreMatch.find()) {
- String author = authoreMatch.group(1);
-
- Pattern statusPatten = compile("状态:([^/]+)");
- Matcher statusMatch = statusPatten.matcher(body);
- if (statusMatch.find()) {
- String status = statusMatch.group(1);
-
- Pattern catPatten = compile("类别:([^/]+)");
- Matcher catMatch = catPatten.matcher(body);
- if (catMatch.find()) {
- String catName = catMatch.group(1);
- int catNum = getCatNum(catName);
-
-
- Pattern updateTimePatten = compile("更新:(\\d+-\\d+-\\d+\\s\\d+:\\d+:\\d+)");
- Matcher updateTimeMatch = updateTimePatten.matcher(body);
- if (updateTimeMatch.find()) {
- String updateTimeStr = updateTimeMatch.group(1);
- SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
- Date updateTime = format.parse(updateTimeStr);
- Pattern picPatten = compile("
]+)\"\\s+onerror=\"this.src=");
- Matcher picMather = picPatten.matcher(body);
- if (picMather.find()) {
- String picSrc = picMather.group(1);
-
-
- Pattern descPatten = compile("class=\"review\">([^<]+)
");
- Matcher descMatch = descPatten.matcher(body);
- if (descMatch.find()) {
- String desc = descMatch.group(1);
-
-
- Book book = new Book();
- book.setAuthor(author);
- book.setCatid(catNum);
- book.setBookDesc(desc);
- book.setBookName(bookName);
- book.setScore(score > 10 ? 8.0f : score);
- book.setPicUrl(picSrc);
- book.setBookStatus(status);
- book.setUpdateTime(updateTime);
-
- List indexList = new ArrayList<>();
- List contentList = new ArrayList<>();
-
- //读取目录
- Pattern indexPatten = compile("查看完整目录");
- Matcher indexMatch = indexPatten.matcher(body);
- if (indexMatch.find()) {
- String indexUrl = baseUrl + indexMatch.group(1);
- String body2 = getByRestTemplate(indexUrl);
- if (body2 != null) {
- Pattern indexListPatten = compile("([^/]+)");
- Matcher indexListMatch = indexListPatten.matcher(body2);
-
- boolean isFindIndex = indexListMatch.find();
-
- int indexNum = 0;
-
- //查询该书籍已存在目录号
- List hasIndexNum = bookService.queryIndexNumByBookNameAndAuthor(bookName, author);
- //更新和插入分别开,插入只在凌晨做一次
- if (hasIndexNum.size() > 0) {
- while (isFindIndex) {
- if (!hasIndexNum.contains(indexNum)) {
-
- String contentUrl = baseUrl + indexListMatch.group(1);
- String indexName = indexListMatch.group(2);
-
-
- //查询章节内容
- String body3 = getByRestTemplate(contentUrl);
- if (body3 != null) {
- String start = "『章节错误,点此举报』";
- String end = "『加入书签,方便阅读』";
- String content = body3.substring(body3.indexOf(start) + start.length(), body3.indexOf(end));
- //TODO插入章节目录和章节内容
- BookIndex bookIndex = new BookIndex();
- bookIndex.setIndexName(indexName);
- bookIndex.setIndexNum(indexNum);
- indexList.add(bookIndex);
- BookContent bookContent = new BookContent();
- bookContent.setContent(content);
- bookContent.setIndexNum(indexNum);
- contentList.add(bookContent);
-
-
- } else {
- break;
- }
-
-
- }
- indexNum++;
- isFindIndex = indexListMatch.find();
- }
-
- if (indexList.size() == contentList.size() && indexList.size() > 0) {
- ExcutorUtils.excuteFixedTask(() ->
- bookService.saveBookAndIndexAndContent(book, indexList, contentList)
- );
-
- }
- }
- }
-
-
- }
-
-
- }
-
- }
- }
- }
- }
-
-
- }
-
- }
-
-
- } catch (Exception e) {
-
- e.printStackTrace();
-
- } finally {
- bookMatcher.find();
- isFind = bookMatcher.find();
- scoreFind = scoreMatch.find();
- isBookNameMatch = bookNameMatch.find();
- }
-
-
- }
- }
-
- /**
- * 从笔趣岛更新
- */
- private void updateBiqudaoBooks(int bookClass) {
- String baseUrl = "https://m.biqudao.com";
- String catBookListUrlBase = baseUrl + "/bqgeclass/";
-
- int page = 1;
- String catBookListUrl = catBookListUrlBase + bookClass + "/" + page + ".html";
- String forObject = getByRestTemplate(catBookListUrl);
- if (forObject != null) {
- Pattern pattern = compile("value=\"(\\d+)/(\\d+)\"");
- Matcher matcher = pattern.matcher(forObject);
- boolean isFind = matcher.find();
- if (isFind) {
- //解析第一页书籍的数据
- Pattern bookPatten = compile("href=\"/(bqge\\d+)/\"");
- parseBiquDaoBook(bookPatten, forObject, baseUrl);
- }
- }
-
-
- }
-
-
- /**
- * 解析笔趣岛数据
- */
- private void parseBiquDaoBook(Pattern bookPatten, String forObject, String baseUrl) {
-
- Matcher bookMatcher = bookPatten.matcher(forObject);
- boolean isFind = bookMatcher.find();
- Pattern scorePatten = compile("(\\d+\\.\\d+)分
");
- Matcher scoreMatch = scorePatten.matcher(forObject);
- boolean scoreFind = scoreMatch.find();
-
- Pattern bookNamePatten = compile("([^/]+)
");
- Matcher bookNameMatch = bookNamePatten.matcher(forObject);
- boolean isBookNameMatch = bookNameMatch.find();
-
- while (isFind && scoreFind && isBookNameMatch) {
-
- try {
- Float score = Float.parseFloat(scoreMatch.group(1));
-
- if (score < lowestScore) {
- continue;
- }
-
- String bokNum = bookMatcher.group(1);
- String bookUrl = baseUrl + "/" + bokNum + "/";
-
- String body = getByRestTemplate(bookUrl);
- if (body != null) {
-
- String bookName = bookNameMatch.group(1);
- Pattern authorPatten = compile("作者:([^/]+)");
- Matcher authoreMatch = authorPatten.matcher(body);
- if (authoreMatch.find()) {
- String author = authoreMatch.group(1);
-
- Pattern statusPatten = compile("状态:([^/]+)");
- Matcher statusMatch = statusPatten.matcher(body);
- if (statusMatch.find()) {
- String status = statusMatch.group(1);
-
- Pattern catPatten = compile("类别:([^/]+)");
- Matcher catMatch = catPatten.matcher(body);
- if (catMatch.find()) {
- String catName = catMatch.group(1);
- int catNum = getCatNum(catName);
- Pattern updateTimePatten = compile("更新:(\\d+-\\d+-\\d+\\s\\d+:\\d+:\\d+)");
- Matcher updateTimeMatch = updateTimePatten.matcher(body);
- if (updateTimeMatch.find()) {
- String updateTimeStr = updateTimeMatch.group(1);
- SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
- Date updateTime = format.parse(updateTimeStr);
- Pattern picPatten = compile("
]+)\"\\s+onerror=\"this.src=");
- Matcher picMather = picPatten.matcher(body);
- if (picMather.find()) {
- String picSrc = picMather.group(1);
-
-
- Pattern descPatten = compile("class=\"review\">([^<]+)");
- Matcher descMatch = descPatten.matcher(body);
- if (descMatch.find()) {
- String desc = descMatch.group(1);
-
-
- Book book = new Book();
- book.setAuthor(author);
- book.setCatid(catNum);
- book.setBookDesc(desc);
- book.setBookName(bookName);
- book.setScore(score > 10 ? 8.0f : score);
- book.setPicUrl(picSrc);
- book.setBookStatus(status);
- book.setUpdateTime(updateTime);
-
- List indexList = new ArrayList<>();
- List contentList = new ArrayList<>();
-
- //读取目录
- Pattern indexPatten = compile("查看完整目录");
- Matcher indexMatch = indexPatten.matcher(body);
- if (indexMatch.find()) {
- String indexUrl = baseUrl + indexMatch.group(1);
- String body2 = getByRestTemplate(indexUrl);
- if (body2 != null) {
- Pattern indexListPatten = compile("([^/]+)");
- Matcher indexListMatch = indexListPatten.matcher(body2);
-
- boolean isFindIndex = indexListMatch.find();
-
- int indexNum = 0;
-
- //查询该书籍已存在目录号
- List hasIndexNum = bookService.queryIndexNumByBookNameAndAuthor(bookName, author);
- //只更新已存在的书籍
- if (hasIndexNum.size() > 0) {
- while (isFindIndex) {
- if (!hasIndexNum.contains(indexNum)) {
-
- String contentUrl = baseUrl + indexListMatch.group(1);
- String indexName = indexListMatch.group(2);
-
-
- //查询章节内容
- String body3 = getByRestTemplate(contentUrl);
- if (body3 != null) {
- String start = "『章节错误,点此举报』";
- String end = "『加入书签,方便阅读』";
- String content = body3.substring(body3.indexOf(start) + start.length(), body3.indexOf(end));
- //TODO插入章节目录和章节内容
- BookIndex bookIndex = new BookIndex();
- bookIndex.setIndexName(indexName);
- bookIndex.setIndexNum(indexNum);
- indexList.add(bookIndex);
- BookContent bookContent = new BookContent();
- bookContent.setContent(content);
- bookContent.setIndexNum(indexNum);
- contentList.add(bookContent);
-
-
- } else {
- break;
- }
-
-
- }
- indexNum++;
- isFindIndex = indexListMatch.find();
- }
-
- if (indexList.size() == contentList.size() && indexList.size() > 0) {
- ExcutorUtils.excuteFixedTask(() -> bookService.saveBookAndIndexAndContent(book, indexList, contentList));
-
- }
- }
- }
-
-
- }
-
-
- }
-
-
- }
- }
- }
- }
-
-
- }
-
- }
-
-
- } catch (Exception e) {
-
- e.printStackTrace();
-
- } finally {
- bookMatcher.find();
- isFind = bookMatcher.find();
- scoreFind = scoreMatch.find();
- isBookNameMatch = bookNameMatch.find();
- }
-
-
- }
-
- }
-
- private int getCatNum(String catName) {
- int catNum;
- switch (catName) {
- case "武侠仙侠": {
- catNum = 2;
- break;
- }
- case "都市言情": {
- catNum = 3;
- break;
- }
- case "历史军事": {
- catNum = 4;
- break;
- }
- case "科幻灵异": {
- catNum = 5;
- break;
- }
- case "网游竞技": {
- catNum = 6;
- break;
- }
- case "女生频道": {
- catNum = 7;
- break;
- }
- default: {
- catNum = 1;
- break;
- }
- }
- return catNum;
- }
-
- private String getByRestTemplate(String url) {
- try {
- ResponseEntity forEntity = utf8RestTemplate.getForEntity(url, String.class);
- if (forEntity.getStatusCode() == HttpStatus.OK) {
- return forEntity.getBody();
- } else {
- return null;
- }
- } catch (Exception e) {
- log.error(e.getMessage(), e);
- return null;
- }
- }
}
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/utils/CatUtil.java b/novel-front/src/main/java/xyz/zinglizingli/common/utils/CatUtil.java
new file mode 100644
index 0000000..bc0cb1d
--- /dev/null
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/utils/CatUtil.java
@@ -0,0 +1,179 @@
+package xyz.zinglizingli.common.utils;
+
+/**
+ * @author 11797
+ */
+public class CatUtil {
+
+ public static int getCatNum(String catName) {
+ int catNum;
+ switch (catName) {
+ case "武侠仙侠": {
+ catNum = 2;
+ break;
+ }
+ case "都市言情": {
+ catNum = 3;
+ break;
+ }
+ case "历史军事": {
+ catNum = 4;
+ break;
+ }
+ case "科幻灵异": {
+ catNum = 5;
+ break;
+ }
+ case "网游竞技": {
+ catNum = 6;
+ break;
+ }
+ case "女生频道": {
+ catNum = 7;
+ break;
+ }
+ default: {
+ catNum = 1;
+ break;
+ }
+ }
+ return catNum;
+ }
+
+
+
+ /**
+ * 查询轻小说分类名
+ * */
+ public static String getSoftCatNameById(Integer softCat) {
+ String catName = "其他";
+
+ switch (softCat) {
+ case 21: {
+ catName = "魔幻";
+ break;
+ }
+ case 22: {
+ catName = "玄幻";
+ break;
+ }
+ case 23: {
+ catName = "古风";
+ break;
+ }
+ case 24: {
+ catName = "科幻";
+ break;
+ }
+ case 25: {
+ catName = "校园";
+ break;
+ }
+ case 26: {
+ catName = "都市";
+ break;
+ }
+ case 27: {
+ catName = "游戏";
+ break;
+ }
+ case 28: {
+ catName = "同人";
+ break;
+ }
+ case 29: {
+ catName = "悬疑";
+ break;
+ }
+ case 0: {
+ catName = "动漫";
+ break;
+ }
+ default: {
+ break;
+ }
+
+
+ }
+ return catName;
+
+ }
+
+ /**
+ * 查询漫画分类名
+ * */
+ public static String getMhCatNameById(Integer softCat) {
+ String catName = "其他";
+
+ switch (softCat) {
+ case 3262: {
+ catName = "少年漫";
+ break;
+ }
+ case 3263: {
+ catName = "少女漫";
+ break;
+ }
+ default: {
+ break;
+ }
+
+
+ }
+ return catName;
+
+ }
+
+
+ /**
+ * 获取分类名
+ * */
+ public static String getCatNameById(Integer catid) {
+ String catName = "其他";
+
+ switch (catid) {
+ case 1: {
+ catName = "玄幻奇幻";
+ break;
+ }
+ case 2: {
+ catName = "武侠仙侠";
+ break;
+ }
+ case 3: {
+ catName = "都市言情";
+ break;
+ }
+ case 4: {
+ catName = "历史军事";
+ break;
+ }
+ case 5: {
+ catName = "科幻灵异";
+ break;
+ }
+ case 6: {
+ catName = "网游竞技";
+ break;
+ }
+ case 7: {
+ catName = "女生频道";
+ break;
+ }
+ case 8: {
+ catName = "轻小说";
+ break;
+ }
+ case 9: {
+ catName = "漫画";
+ break;
+ }
+ default: {
+ break;
+ }
+
+
+ }
+ return catName;
+ }
+}
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/utils/Constants.java b/novel-front/src/main/java/xyz/zinglizingli/common/utils/Constants.java
index ac4ed0a..69b2d10 100644
--- a/novel-front/src/main/java/xyz/zinglizingli/common/utils/Constants.java
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/utils/Constants.java
@@ -85,4 +85,9 @@ public class Constants {
* 多本书籍ID分隔符
* */
public static final String BOOK_ID_SEPARATOR = "-";
+
+ /**
+ * 没有内容的描述
+ * */
+ public static final String NO_CONTENT_DESC = "正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新";
}
diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/utils/RestTemplateUtil.java b/novel-front/src/main/java/xyz/zinglizingli/common/utils/RestTemplateUtil.java
index b956290..19046c4 100644
--- a/novel-front/src/main/java/xyz/zinglizingli/common/utils/RestTemplateUtil.java
+++ b/novel-front/src/main/java/xyz/zinglizingli/common/utils/RestTemplateUtil.java
@@ -1,5 +1,9 @@
package xyz.zinglizingli.common.utils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.codec.Charsets;
+import org.springframework.http.HttpStatus;
+import org.springframework.http.ResponseEntity;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.http.converter.HttpMessageConverter;
import org.springframework.http.converter.StringHttpMessageConverter;
@@ -10,6 +14,10 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+/**
+ * @author 11797
+ */
+@Slf4j
public class RestTemplateUtil {
private static Map restTemplateMap = new HashMap<>();
@@ -35,4 +43,18 @@ public class RestTemplateUtil {
return restTemplate;
}
+ public static String getBodyByUtf8(String url) {
+ try {
+ ResponseEntity forEntity = getInstance(Charsets.UTF_8).getForEntity(url, String.class);
+ if (forEntity.getStatusCode() == HttpStatus.OK) {
+ return forEntity.getBody();
+ } else {
+ return null;
+ }
+ } catch (Exception e) {
+ log.error(e.getMessage(), e);
+ return null;
+ }
+ }
+
}
diff --git a/novel-front/src/main/resources/application-crawl.yml b/novel-front/src/main/resources/application-crawl.yml
new file mode 100644
index 0000000..438ef8b
--- /dev/null
+++ b/novel-front/src/main/resources/application-crawl.yml
@@ -0,0 +1,33 @@
+#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔 更多网站解析中,敬请期待
+biquta:
+ crawlsource:
+ enabled: true #是否开启此爬虫源
+ index-url: https://m.biquta.la
+ list-page-url: https://m.biquta.la/class/{0}/{1}.html
+ book-url-pattern: href="/(\d+_\d+)/"
+ score-pattern: (\d+\.\d+)分
+ book-name-pattern: ([^/]+)
+ author-pattern: 作者:([^/]+)<
+ status-pattern: 状态:([^/]+)
+ cat-pattern: 类别:([^/]+)
+ update-time-pattern: 更新:(\d+-\d+-\d+\s\d+:\d+:\d+)
+ pic-pattern:
([^<]+)
+ catalog-url-pattern: 查看完整目录
+ catalog-pattern: ([^/]+)
+biqudao:
+ crawlsource:
+ enabled: true #是否开启此爬虫源
+ index-url: https://m.biqudao.com
+ list-page-url: https://m.biqudao.com/bqgeclass/{0}/{1}.html
+ book-url-pattern: href="/(bqge\d+)/"
+ score-pattern: (\d+\.\d+)分
+ book-name-pattern: ([^/]+)
+ author-pattern: 作者:([^/]+)
+ status-pattern: 状态:([^/]+)
+ cat-pattern: 类别:([^/]+)
+ update-time-pattern: 更新:(\d+-\d+-\d+\s\d+:\d+:\d+)
+ pic-pattern:
([^<]+)
+ catalog-url-pattern: 查看完整目录
+ catalog-pattern: ([^/]+)
\ No newline at end of file
diff --git a/novel-front/src/main/resources/application.yml b/novel-front/src/main/resources/application.yml
index dd5fe56..3298845 100644
--- a/novel-front/src/main/resources/application.yml
+++ b/novel-front/src/main/resources/application.yml
@@ -4,8 +4,8 @@ server:
spring:
datasource:
url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai
- username: books
- password: books
+ username: root
+ password: test123456
# url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai
# username: root
# password: test123456
@@ -39,6 +39,8 @@ spring:
port: 465
class: javax.net.ssl.SSLSocketFactory
fallback: false
+ profiles:
+ include: crawl