mirror of
https://github.com/201206030/novel.git
synced 2025-04-27 07:30:50 +00:00
爬虫代码重构,增加可维护性
This commit is contained in:
parent
be3cf1bb91
commit
64f6dc393e
@ -19,6 +19,7 @@ import xyz.zinglizingli.common.constant.CacheKeyConstans;
|
||||
import xyz.zinglizingli.common.enums.PicSaveType;
|
||||
import xyz.zinglizingli.books.mapper.*;
|
||||
import xyz.zinglizingli.books.po.*;
|
||||
import xyz.zinglizingli.common.utils.Constants;
|
||||
import xyz.zinglizingli.common.utils.UUIDUtils;
|
||||
import xyz.zinglizingli.common.cache.CommonCacheUtil;
|
||||
import xyz.zinglizingli.common.utils.RestTemplateUtil;
|
||||
@ -96,7 +97,7 @@ public class BookService {
|
||||
List<BookContent> newContentList = new ArrayList<>();
|
||||
for (int i = 0; i < bookIndex.size(); i++) {
|
||||
BookContent bookContentItem = bookContent.get(i);
|
||||
if (!bookContentItem.getContent().contains("正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新")) {
|
||||
if (!bookContentItem.getContent().contains(Constants.NO_CONTENT_DESC)) {
|
||||
BookIndex bookIndexItem = bookIndex.get(i);
|
||||
bookIndexItem.setBookId(bookId);
|
||||
bookContentItem.setBookId(bookId);
|
||||
@ -207,57 +208,7 @@ public class BookService {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取分类名
|
||||
* */
|
||||
public String getCatNameById(Integer catid) {
|
||||
String catName = "其他";
|
||||
|
||||
switch (catid) {
|
||||
case 1: {
|
||||
catName = "玄幻奇幻";
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
catName = "武侠仙侠";
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
catName = "都市言情";
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
catName = "历史军事";
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
catName = "科幻灵异";
|
||||
break;
|
||||
}
|
||||
case 6: {
|
||||
catName = "网游竞技";
|
||||
break;
|
||||
}
|
||||
case 7: {
|
||||
catName = "女生频道";
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
catName = "轻小说";
|
||||
break;
|
||||
}
|
||||
case 9: {
|
||||
catName = "漫画";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
return catName;
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询书籍的基础数据
|
||||
@ -374,87 +325,6 @@ public class BookService {
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 查询轻小说分类名
|
||||
* */
|
||||
public String getSoftCatNameById(Integer softCat) {
|
||||
String catName = "其他";
|
||||
|
||||
switch (softCat) {
|
||||
case 21: {
|
||||
catName = "魔幻";
|
||||
break;
|
||||
}
|
||||
case 22: {
|
||||
catName = "玄幻";
|
||||
break;
|
||||
}
|
||||
case 23: {
|
||||
catName = "古风";
|
||||
break;
|
||||
}
|
||||
case 24: {
|
||||
catName = "科幻";
|
||||
break;
|
||||
}
|
||||
case 25: {
|
||||
catName = "校园";
|
||||
break;
|
||||
}
|
||||
case 26: {
|
||||
catName = "都市";
|
||||
break;
|
||||
}
|
||||
case 27: {
|
||||
catName = "游戏";
|
||||
break;
|
||||
}
|
||||
case 28: {
|
||||
catName = "同人";
|
||||
break;
|
||||
}
|
||||
case 29: {
|
||||
catName = "悬疑";
|
||||
break;
|
||||
}
|
||||
case 0: {
|
||||
catName = "动漫";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
return catName;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询漫画分类名
|
||||
* */
|
||||
public String getMhCatNameById(Integer softCat) {
|
||||
String catName = "其他";
|
||||
|
||||
switch (softCat) {
|
||||
case 3262: {
|
||||
catName = "少年漫";
|
||||
break;
|
||||
}
|
||||
case 3263: {
|
||||
catName = "少女漫";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
return catName;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存弹幕
|
||||
|
@ -15,6 +15,8 @@ import xyz.zinglizingli.books.po.BookIndex;
|
||||
import xyz.zinglizingli.books.service.BookService;
|
||||
import xyz.zinglizingli.books.vo.BookVO;
|
||||
import xyz.zinglizingli.common.cache.CommonCacheUtil;
|
||||
import xyz.zinglizingli.common.utils.CatUtil;
|
||||
import xyz.zinglizingli.common.utils.Constants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
@ -71,7 +73,7 @@ public class ApiBookController {
|
||||
String userId = null;
|
||||
String titleType = "最近更新";
|
||||
if (catId != null) {
|
||||
titleType = bookService.getCatNameById(catId);
|
||||
titleType = CatUtil.getCatNameById(catId);
|
||||
} else if (keyword != null) {
|
||||
titleType = "搜索";
|
||||
} else if ("score".equals(sortBy)) {
|
||||
@ -90,7 +92,7 @@ public class ApiBookController {
|
||||
for (Book book : books) {
|
||||
BookVO bookvo = new BookVO();
|
||||
BeanUtils.copyProperties(book, bookvo);
|
||||
bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
|
||||
bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
|
||||
bookVOList.add(bookvo);
|
||||
}
|
||||
|
||||
@ -103,7 +105,7 @@ public class ApiBookController {
|
||||
int index = idsArr.indexOf(book.getId() + "");
|
||||
BookVO bookvo = new BookVO();
|
||||
BeanUtils.copyProperties(book, bookvo);
|
||||
bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
|
||||
bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
|
||||
bookVOArr[length - index - 1] = bookvo;
|
||||
}
|
||||
bookVOList = Arrays.asList(bookVOArr);
|
||||
@ -139,7 +141,7 @@ public class ApiBookController {
|
||||
|
||||
BookVO bookvo = new BookVO();
|
||||
BeanUtils.copyProperties(book, bookvo);
|
||||
bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
|
||||
bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
|
||||
modelMap.put("bookId", bookId);
|
||||
modelMap.put("book", bookvo);
|
||||
modelMap.put("indexList", indexList);
|
||||
@ -185,7 +187,7 @@ public class ApiBookController {
|
||||
bookContent.setId(-1L);
|
||||
bookContent.setBookId(bookId);
|
||||
bookContent.setIndexNum(indexNum);
|
||||
bookContent.setContent("正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新");
|
||||
bookContent.setContent(Constants.NO_CONTENT_DESC);
|
||||
indexName="?";
|
||||
}else{
|
||||
indexName = bookService.queryIndexNameByBookIdAndIndexNum(bookId, indexNum);
|
||||
|
@ -20,6 +20,7 @@ import xyz.zinglizingli.books.service.BookService;
|
||||
import xyz.zinglizingli.books.service.UserService;
|
||||
import xyz.zinglizingli.books.vo.BookVO;
|
||||
import xyz.zinglizingli.common.cache.CommonCacheUtil;
|
||||
import xyz.zinglizingli.common.utils.CatUtil;
|
||||
import xyz.zinglizingli.common.utils.Constants;
|
||||
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
@ -62,7 +63,7 @@ public class BookController {
|
||||
String userId = null;
|
||||
String titleType = "最近更新";
|
||||
if (catId != null) {
|
||||
titleType = bookService.getCatNameById(catId) + "分类频道";
|
||||
titleType = CatUtil.getCatNameById(catId) + "分类频道";
|
||||
} else if (Constants.NOVEL_TOP_FIELD.equals(sortBy)) {
|
||||
titleType = "小说排行";
|
||||
} else if (ids != null) {
|
||||
@ -84,7 +85,7 @@ public class BookController {
|
||||
for (Book book : books) {
|
||||
BookVO bookvo = new BookVO();
|
||||
BeanUtils.copyProperties(book, bookvo);
|
||||
bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
|
||||
bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
|
||||
bookVoList.add(bookvo);
|
||||
}
|
||||
|
||||
@ -97,7 +98,7 @@ public class BookController {
|
||||
int index = idsArr.indexOf(book.getId() + "");
|
||||
BookVO bookvo = new BookVO();
|
||||
BeanUtils.copyProperties(book, bookvo);
|
||||
bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
|
||||
bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
|
||||
bookVoArr[books.size() - index - 1] = bookvo;
|
||||
}
|
||||
bookVoList = Arrays.asList(bookVoArr);
|
||||
@ -144,10 +145,10 @@ public class BookController {
|
||||
BeanUtils.copyProperties(book, bookvo);
|
||||
if(catId == Constants.SOFT_NOVEL_CAT) {
|
||||
//轻小说
|
||||
bookvo.setCateName(bookService.getSoftCatNameById(bookvo.getSoftCat()));
|
||||
bookvo.setCateName(CatUtil.getSoftCatNameById(bookvo.getSoftCat()));
|
||||
}else if(catId == Constants.MH_NOVEL_CAT){
|
||||
//漫画
|
||||
bookvo.setCateName(bookService.getMhCatNameById(bookvo.getSoftCat()));
|
||||
bookvo.setCateName(CatUtil.getMhCatNameById(bookvo.getSoftCat()));
|
||||
}
|
||||
bookVoList.add(bookvo);
|
||||
}
|
||||
@ -204,7 +205,7 @@ public class BookController {
|
||||
|
||||
BookVO bookvo = new BookVO();
|
||||
BeanUtils.copyProperties(book, bookvo);
|
||||
bookvo.setCateName(bookService.getCatNameById(bookvo.getCatid()));
|
||||
bookvo.setCateName(CatUtil.getCatNameById(bookvo.getCatid()));
|
||||
|
||||
modelMap.put("bookId", bookId);
|
||||
modelMap.put("book", bookvo);
|
||||
@ -243,7 +244,7 @@ public class BookController {
|
||||
bookContent.setId(-1L);
|
||||
bookContent.setBookId(bookId);
|
||||
bookContent.setIndexNum(indexNum);
|
||||
bookContent.setContent("正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新");
|
||||
bookContent.setContent(Constants.NO_CONTENT_DESC);
|
||||
indexName = "更新中。。。";
|
||||
} else {
|
||||
indexName = bookService.queryIndexNameByBookIdAndIndexNum(bookId, indexNum);
|
||||
|
@ -0,0 +1,27 @@
|
||||
package xyz.zinglizingli.common.config;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import xyz.zinglizingli.common.crawl.BaseHtmlCrawlSource;
|
||||
import xyz.zinglizingli.common.crawl.BiquCrawlSource;
|
||||
|
||||
/**
|
||||
* @author 11797
|
||||
*/
|
||||
@Slf4j
|
||||
@Configuration
|
||||
public class CrawlBiqudaoConfig {
|
||||
|
||||
|
||||
@Bean
|
||||
@ConfigurationProperties(prefix = "biqudao.crawlsource") // prefix值必须是application.yml中对应属性的前缀
|
||||
@ConditionalOnProperty(prefix = "biqudao.crawlsource",name = "enabled",havingValue = "true")
|
||||
public BaseHtmlCrawlSource BiqutaCrawlSource() {
|
||||
return new BiquCrawlSource();
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
package xyz.zinglizingli.common.config;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import xyz.zinglizingli.common.crawl.BaseHtmlCrawlSource;
|
||||
import xyz.zinglizingli.common.crawl.BiquCrawlSource;
|
||||
|
||||
/**
|
||||
* @author 11797
|
||||
*/
|
||||
@Slf4j
|
||||
@Configuration
|
||||
public class CrawlBiqutaConfig {
|
||||
|
||||
|
||||
@Bean
|
||||
@Primary //必须加此注解,不然报错,下一个类则不需要添加
|
||||
@ConfigurationProperties(prefix = "biquta.crawlsource") // prefix值必须是application.yml中对应属性的前缀
|
||||
@ConditionalOnProperty(prefix = "biquta.crawlsource",name = "enabled",havingValue = "true")
|
||||
public BaseHtmlCrawlSource BiqutaCrawlSource() {
|
||||
return new BiquCrawlSource();
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package xyz.zinglizingli.common.crawl;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
|
||||
/**
|
||||
* 爬虫源
|
||||
* @author 11797
|
||||
*/
|
||||
@Data
|
||||
public abstract class BaseCrawlSource {
|
||||
|
||||
@Value("${books.lowestScore}")
|
||||
private Float lowestScore;
|
||||
|
||||
/**
|
||||
* 解析数据
|
||||
* */
|
||||
public abstract void parse();
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
package xyz.zinglizingli.common.crawl;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* html爬虫源
|
||||
* @author 11797
|
||||
*/
|
||||
@Data
|
||||
public abstract class BaseHtmlCrawlSource extends BaseCrawlSource{
|
||||
|
||||
/**
|
||||
* 首页url
|
||||
* */
|
||||
private String indexUrl;
|
||||
|
||||
/**
|
||||
* 列表页url
|
||||
* */
|
||||
private String listPageUrl;
|
||||
|
||||
/**
|
||||
* 书籍url Pattern
|
||||
* */
|
||||
private String bookUrlPattern;
|
||||
|
||||
/**
|
||||
* 评分 Pattern
|
||||
* */
|
||||
private String scorePattern;
|
||||
|
||||
/**
|
||||
* 书名 Pattern
|
||||
* */
|
||||
private String bookNamePattern;
|
||||
|
||||
/**
|
||||
* 作者 Pattern
|
||||
* */
|
||||
private String authorPattern;
|
||||
|
||||
/**
|
||||
* 状态 Pattern
|
||||
* */
|
||||
private String statusPattern;
|
||||
|
||||
/**
|
||||
* 类别 Pattern
|
||||
* */
|
||||
private String catPattern;
|
||||
|
||||
|
||||
/**
|
||||
* 更新时间 Pattern
|
||||
* */
|
||||
private String updateTimePattern;
|
||||
|
||||
|
||||
/**
|
||||
* 封面 Pattern
|
||||
* */
|
||||
private String picPattern;
|
||||
|
||||
|
||||
/**
|
||||
* 简介 Pattern
|
||||
* */
|
||||
private String introPattern;
|
||||
|
||||
/**
|
||||
* 完整目录页url Pattern
|
||||
* */
|
||||
private String catalogUrlPattern;
|
||||
|
||||
/**
|
||||
* 目录 Pattern
|
||||
* */
|
||||
private String catalogPattern;
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
package xyz.zinglizingli.common.crawl;
|
||||
|
||||
/**
|
||||
* Json爬虫源
|
||||
* @author 11797
|
||||
*/
|
||||
public abstract class BaseJsonCrawlSource extends BaseCrawlSource{
|
||||
}
|
@ -0,0 +1,216 @@
|
||||
package xyz.zinglizingli.common.crawl;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import xyz.zinglizingli.books.po.Book;
|
||||
import xyz.zinglizingli.books.po.BookContent;
|
||||
import xyz.zinglizingli.books.po.BookIndex;
|
||||
import xyz.zinglizingli.books.service.BookService;
|
||||
import xyz.zinglizingli.common.utils.CatUtil;
|
||||
import xyz.zinglizingli.common.utils.ExcutorUtils;
|
||||
import xyz.zinglizingli.common.utils.RestTemplateUtil;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static java.util.regex.Pattern.compile;
|
||||
|
||||
/**
|
||||
* @author 11797
|
||||
*/
|
||||
@Slf4j
|
||||
public class BiquCrawlSource extends BaseHtmlCrawlSource {
|
||||
|
||||
|
||||
@Autowired
|
||||
private BookService bookService;
|
||||
|
||||
@Override
|
||||
public void parse() {
|
||||
|
||||
String catBookListUrl = getListPageUrl().replace("{0}", "0").replace("{1}", "1");
|
||||
String forObject = RestTemplateUtil.getBodyByUtf8(catBookListUrl);
|
||||
if (forObject != null) {
|
||||
//解析第一页书籍的数据
|
||||
Pattern bookPatten = compile(getBookUrlPattern());
|
||||
|
||||
Matcher bookMatcher = bookPatten.matcher(forObject);
|
||||
|
||||
boolean isFind = bookMatcher.find();
|
||||
Pattern scorePatten = compile(getScorePattern());
|
||||
Matcher scoreMatch = scorePatten.matcher(forObject);
|
||||
boolean scoreFind = scoreMatch.find();
|
||||
|
||||
Pattern bookNamePatten = compile(getBookNamePattern());
|
||||
|
||||
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
|
||||
|
||||
boolean isBookNameMatch = bookNameMatch.find();
|
||||
|
||||
while (isFind && scoreFind && isBookNameMatch) {
|
||||
|
||||
try {
|
||||
Float score = Float.parseFloat(scoreMatch.group(1));
|
||||
|
||||
if (score < getLowestScore()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String bokNum = bookMatcher.group(1);
|
||||
String bookUrl = getIndexUrl() + "/" + bokNum + "/";
|
||||
|
||||
String body = RestTemplateUtil.getBodyByUtf8(bookUrl);
|
||||
if (body != null) {
|
||||
|
||||
String bookName = bookNameMatch.group(1);
|
||||
Pattern authorPatten = compile(getAuthorPattern());
|
||||
Matcher authoreMatch = authorPatten.matcher(body);
|
||||
if (authoreMatch.find()) {
|
||||
String author = authoreMatch.group(1);
|
||||
|
||||
Pattern statusPatten = compile(getStatusPattern());
|
||||
Matcher statusMatch = statusPatten.matcher(body);
|
||||
if (statusMatch.find()) {
|
||||
String status = statusMatch.group(1);
|
||||
|
||||
Pattern catPatten = compile(getCatPattern());
|
||||
Matcher catMatch = catPatten.matcher(body);
|
||||
if (catMatch.find()) {
|
||||
String catName = catMatch.group(1);
|
||||
int catNum = CatUtil.getCatNum(catName);
|
||||
|
||||
|
||||
Pattern updateTimePatten = compile(getUpdateTimePattern());
|
||||
Matcher updateTimeMatch = updateTimePatten.matcher(body);
|
||||
if (updateTimeMatch.find()) {
|
||||
String updateTimeStr = updateTimeMatch.group(1);
|
||||
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
|
||||
Date updateTime = format.parse(updateTimeStr);
|
||||
Pattern picPatten = compile(getPicPattern());
|
||||
Matcher picMather = picPatten.matcher(body);
|
||||
if (picMather.find()) {
|
||||
String picSrc = picMather.group(1);
|
||||
|
||||
|
||||
Pattern descPatten = compile(getIntroPattern());
|
||||
Matcher descMatch = descPatten.matcher(body);
|
||||
if (descMatch.find()) {
|
||||
String desc = descMatch.group(1);
|
||||
|
||||
|
||||
Book book = new Book();
|
||||
book.setAuthor(author);
|
||||
book.setCatid(catNum);
|
||||
book.setBookDesc(desc);
|
||||
book.setBookName(bookName);
|
||||
book.setScore(score > 10 ? 8.0f : score);
|
||||
book.setPicUrl(picSrc);
|
||||
book.setBookStatus(status);
|
||||
book.setUpdateTime(updateTime);
|
||||
|
||||
List<BookIndex> indexList = new ArrayList<>();
|
||||
List<BookContent> contentList = new ArrayList<>();
|
||||
|
||||
//读取目录
|
||||
Pattern indexPatten = compile(getCatalogUrlPattern());
|
||||
Matcher indexMatch = indexPatten.matcher(body);
|
||||
if (indexMatch.find()) {
|
||||
String indexUrl = getIndexUrl() + indexMatch.group(1);
|
||||
String body2 = RestTemplateUtil.getBodyByUtf8(indexUrl);
|
||||
if (body2 != null) {
|
||||
Pattern indexListPatten = compile(getCatalogPattern());
|
||||
Matcher indexListMatch = indexListPatten.matcher(body2);
|
||||
|
||||
boolean isFindIndex = indexListMatch.find();
|
||||
|
||||
int indexNum = 0;
|
||||
|
||||
//查询该书籍已存在目录号
|
||||
List<Integer> hasIndexNum = bookService.queryIndexNumByBookNameAndAuthor(bookName, author);
|
||||
//更新和插入分别开,插入只在凌晨做一次
|
||||
if (hasIndexNum.size() > 0) {
|
||||
while (isFindIndex) {
|
||||
if (!hasIndexNum.contains(indexNum)) {
|
||||
|
||||
String contentUrl = getIndexUrl() + indexListMatch.group(1);
|
||||
String indexName = indexListMatch.group(2);
|
||||
|
||||
|
||||
//查询章节内容
|
||||
String body3 = RestTemplateUtil.getBodyByUtf8(contentUrl);
|
||||
if (body3 != null) {
|
||||
String start = "『章节错误,点此举报』";
|
||||
String end = "『加入书签,方便阅读』";
|
||||
String content = body3.substring(body3.indexOf(start) + start.length(), body3.indexOf(end));
|
||||
//TODO插入章节目录和章节内容
|
||||
BookIndex bookIndex = new BookIndex();
|
||||
bookIndex.setIndexName(indexName);
|
||||
bookIndex.setIndexNum(indexNum);
|
||||
indexList.add(bookIndex);
|
||||
BookContent bookContent = new BookContent();
|
||||
bookContent.setContent(content);
|
||||
bookContent.setIndexNum(indexNum);
|
||||
contentList.add(bookContent);
|
||||
|
||||
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
indexNum++;
|
||||
isFindIndex = indexListMatch.find();
|
||||
}
|
||||
|
||||
if (indexList.size() == contentList.size() && indexList.size() > 0) {
|
||||
ExcutorUtils.excuteFixedTask(() ->
|
||||
bookService.saveBookAndIndexAndContent(book, indexList, contentList)
|
||||
);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
|
||||
e.printStackTrace();
|
||||
|
||||
} finally {
|
||||
bookMatcher.find();
|
||||
isFind = bookMatcher.find();
|
||||
scoreFind = scoreMatch.find();
|
||||
isBookNameMatch = bookNameMatch.find();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@ -3,28 +3,13 @@ package xyz.zinglizingli.common.schedule;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.codec.Charsets;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import xyz.zinglizingli.books.po.Book;
|
||||
import xyz.zinglizingli.books.po.BookContent;
|
||||
import xyz.zinglizingli.books.po.BookIndex;
|
||||
import xyz.zinglizingli.books.service.BookService;
|
||||
import xyz.zinglizingli.common.utils.ExcutorUtils;
|
||||
import xyz.zinglizingli.common.crawl.BaseCrawlSource;
|
||||
import xyz.zinglizingli.common.utils.RestTemplateUtil;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static java.util.regex.Pattern.compile;
|
||||
|
||||
/**
|
||||
* 更新书籍章节内容定时任务
|
||||
*
|
||||
@ -35,19 +20,8 @@ import static java.util.regex.Pattern.compile;
|
||||
@Slf4j
|
||||
public class CrawlBooksSchedule {
|
||||
|
||||
private final BookService bookService;
|
||||
|
||||
private RestTemplate utf8RestTemplate = RestTemplateUtil.getInstance(Charsets.UTF_8);
|
||||
|
||||
|
||||
@Value("${books.lowestScore}")
|
||||
private Float lowestScore;
|
||||
|
||||
@Value("${crawl.website.type}")
|
||||
private Byte websiteType;
|
||||
|
||||
@Value("${pic.save.path}")
|
||||
private String picSavePath;
|
||||
private final BaseCrawlSource crawlSource;
|
||||
|
||||
|
||||
/**
|
||||
@ -58,459 +32,11 @@ public class CrawlBooksSchedule {
|
||||
|
||||
log.debug("crawlBooksSchedule执行中。。。。。。。。。。。。");
|
||||
|
||||
switch (websiteType) {
|
||||
case 1: {
|
||||
updateBiqudaoBooks(0);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
updateBiquTaBooks(0);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 从笔趣塔更新
|
||||
*/
|
||||
private void updateBiquTaBooks(int bookClass) {
|
||||
String baseUrl = "https://m.biquta.la";
|
||||
String catBookListUrlBase = baseUrl + "/class/";
|
||||
|
||||
String catBookListUrl = catBookListUrlBase + bookClass + "/" + 1 + ".html";
|
||||
String forObject = getByRestTemplate(catBookListUrl);
|
||||
if (forObject != null) {
|
||||
Pattern pattern = compile("value=\"(\\d+)/(\\d+)\"");
|
||||
Matcher matcher = pattern.matcher(forObject);
|
||||
boolean isFind = matcher.find();
|
||||
if (isFind) {
|
||||
//解析第一页书籍的数据
|
||||
Pattern bookPatten = compile("href=\"/(\\d+_\\d+)/\"");
|
||||
parseBiquTaBook(bookPatten, forObject, baseUrl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析笔趣塔数据
|
||||
*/
|
||||
private void parseBiquTaBook(Pattern bookPatten, String forObject, String baseUrl) {
|
||||
Matcher bookMatcher = bookPatten.matcher(forObject);
|
||||
|
||||
boolean isFind = bookMatcher.find();
|
||||
Pattern scorePatten = compile("<div\\s+class=\"score\">(\\d+\\.\\d+)分</div>");
|
||||
Matcher scoreMatch = scorePatten.matcher(forObject);
|
||||
boolean scoreFind = scoreMatch.find();
|
||||
|
||||
Pattern bookNamePatten = compile("<p class=\"title\">([^/]+)</p>");
|
||||
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
|
||||
boolean isBookNameMatch = bookNameMatch.find();
|
||||
|
||||
while (isFind && scoreFind && isBookNameMatch) {
|
||||
|
||||
try {
|
||||
Float score = Float.parseFloat(scoreMatch.group(1));
|
||||
|
||||
if (score < lowestScore) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String bokNum = bookMatcher.group(1);
|
||||
String bookUrl = baseUrl + "/" + bokNum + "/";
|
||||
|
||||
String body = getByRestTemplate(bookUrl);
|
||||
if (body != null) {
|
||||
|
||||
String bookName = bookNameMatch.group(1);
|
||||
Pattern authorPatten = compile(">作者:([^/]+)<");
|
||||
Matcher authoreMatch = authorPatten.matcher(body);
|
||||
if (authoreMatch.find()) {
|
||||
String author = authoreMatch.group(1);
|
||||
|
||||
Pattern statusPatten = compile("状态:([^/]+)</li>");
|
||||
Matcher statusMatch = statusPatten.matcher(body);
|
||||
if (statusMatch.find()) {
|
||||
String status = statusMatch.group(1);
|
||||
|
||||
Pattern catPatten = compile("类别:([^/]+)</li>");
|
||||
Matcher catMatch = catPatten.matcher(body);
|
||||
if (catMatch.find()) {
|
||||
String catName = catMatch.group(1);
|
||||
int catNum = getCatNum(catName);
|
||||
|
||||
|
||||
Pattern updateTimePatten = compile("更新:(\\d+-\\d+-\\d+\\s\\d+:\\d+:\\d+)</a>");
|
||||
Matcher updateTimeMatch = updateTimePatten.matcher(body);
|
||||
if (updateTimeMatch.find()) {
|
||||
String updateTimeStr = updateTimeMatch.group(1);
|
||||
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
|
||||
Date updateTime = format.parse(updateTimeStr);
|
||||
Pattern picPatten = compile("<img src=\"([^>]+)\"\\s+onerror=\"this.src=");
|
||||
Matcher picMather = picPatten.matcher(body);
|
||||
if (picMather.find()) {
|
||||
String picSrc = picMather.group(1);
|
||||
|
||||
|
||||
Pattern descPatten = compile("class=\"review\">([^<]+)</p>");
|
||||
Matcher descMatch = descPatten.matcher(body);
|
||||
if (descMatch.find()) {
|
||||
String desc = descMatch.group(1);
|
||||
|
||||
|
||||
Book book = new Book();
|
||||
book.setAuthor(author);
|
||||
book.setCatid(catNum);
|
||||
book.setBookDesc(desc);
|
||||
book.setBookName(bookName);
|
||||
book.setScore(score > 10 ? 8.0f : score);
|
||||
book.setPicUrl(picSrc);
|
||||
book.setBookStatus(status);
|
||||
book.setUpdateTime(updateTime);
|
||||
|
||||
List<BookIndex> indexList = new ArrayList<>();
|
||||
List<BookContent> contentList = new ArrayList<>();
|
||||
|
||||
//读取目录
|
||||
Pattern indexPatten = compile("<a\\s+href=\"(/du/\\d+_\\d+/)\">查看完整目录</a>");
|
||||
Matcher indexMatch = indexPatten.matcher(body);
|
||||
if (indexMatch.find()) {
|
||||
String indexUrl = baseUrl + indexMatch.group(1);
|
||||
String body2 = getByRestTemplate(indexUrl);
|
||||
if (body2 != null) {
|
||||
Pattern indexListPatten = compile("<a\\s+style=\"\"\\s+href=\"(/\\d+_\\d+/\\d+\\.html)\">([^/]+)</a>");
|
||||
Matcher indexListMatch = indexListPatten.matcher(body2);
|
||||
|
||||
boolean isFindIndex = indexListMatch.find();
|
||||
|
||||
int indexNum = 0;
|
||||
|
||||
//查询该书籍已存在目录号
|
||||
List<Integer> hasIndexNum = bookService.queryIndexNumByBookNameAndAuthor(bookName, author);
|
||||
//更新和插入分别开,插入只在凌晨做一次
|
||||
if (hasIndexNum.size() > 0) {
|
||||
while (isFindIndex) {
|
||||
if (!hasIndexNum.contains(indexNum)) {
|
||||
|
||||
String contentUrl = baseUrl + indexListMatch.group(1);
|
||||
String indexName = indexListMatch.group(2);
|
||||
|
||||
|
||||
//查询章节内容
|
||||
String body3 = getByRestTemplate(contentUrl);
|
||||
if (body3 != null) {
|
||||
String start = "『章节错误,点此举报』";
|
||||
String end = "『加入书签,方便阅读』";
|
||||
String content = body3.substring(body3.indexOf(start) + start.length(), body3.indexOf(end));
|
||||
//TODO插入章节目录和章节内容
|
||||
BookIndex bookIndex = new BookIndex();
|
||||
bookIndex.setIndexName(indexName);
|
||||
bookIndex.setIndexNum(indexNum);
|
||||
indexList.add(bookIndex);
|
||||
BookContent bookContent = new BookContent();
|
||||
bookContent.setContent(content);
|
||||
bookContent.setIndexNum(indexNum);
|
||||
contentList.add(bookContent);
|
||||
|
||||
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
indexNum++;
|
||||
isFindIndex = indexListMatch.find();
|
||||
}
|
||||
|
||||
if (indexList.size() == contentList.size() && indexList.size() > 0) {
|
||||
ExcutorUtils.excuteFixedTask(() ->
|
||||
bookService.saveBookAndIndexAndContent(book, indexList, contentList)
|
||||
);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
crawlSource.parse();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
|
||||
e.printStackTrace();
|
||||
|
||||
} finally {
|
||||
bookMatcher.find();
|
||||
isFind = bookMatcher.find();
|
||||
scoreFind = scoreMatch.find();
|
||||
isBookNameMatch = bookNameMatch.find();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 从笔趣岛更新
|
||||
*/
|
||||
private void updateBiqudaoBooks(int bookClass) {
|
||||
String baseUrl = "https://m.biqudao.com";
|
||||
String catBookListUrlBase = baseUrl + "/bqgeclass/";
|
||||
|
||||
int page = 1;
|
||||
String catBookListUrl = catBookListUrlBase + bookClass + "/" + page + ".html";
|
||||
String forObject = getByRestTemplate(catBookListUrl);
|
||||
if (forObject != null) {
|
||||
Pattern pattern = compile("value=\"(\\d+)/(\\d+)\"");
|
||||
Matcher matcher = pattern.matcher(forObject);
|
||||
boolean isFind = matcher.find();
|
||||
if (isFind) {
|
||||
//解析第一页书籍的数据
|
||||
Pattern bookPatten = compile("href=\"/(bqge\\d+)/\"");
|
||||
parseBiquDaoBook(bookPatten, forObject, baseUrl);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 解析笔趣岛数据
|
||||
*/
|
||||
private void parseBiquDaoBook(Pattern bookPatten, String forObject, String baseUrl) {
|
||||
|
||||
Matcher bookMatcher = bookPatten.matcher(forObject);
|
||||
boolean isFind = bookMatcher.find();
|
||||
Pattern scorePatten = compile("<div\\s+class=\"score\">(\\d+\\.\\d+)分</div>");
|
||||
Matcher scoreMatch = scorePatten.matcher(forObject);
|
||||
boolean scoreFind = scoreMatch.find();
|
||||
|
||||
Pattern bookNamePatten = compile("<p class=\"title\">([^/]+)</p>");
|
||||
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
|
||||
boolean isBookNameMatch = bookNameMatch.find();
|
||||
|
||||
while (isFind && scoreFind && isBookNameMatch) {
|
||||
|
||||
try {
|
||||
Float score = Float.parseFloat(scoreMatch.group(1));
|
||||
|
||||
if (score < lowestScore) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String bokNum = bookMatcher.group(1);
|
||||
String bookUrl = baseUrl + "/" + bokNum + "/";
|
||||
|
||||
String body = getByRestTemplate(bookUrl);
|
||||
if (body != null) {
|
||||
|
||||
String bookName = bookNameMatch.group(1);
|
||||
Pattern authorPatten = compile("<li class=\"author\">作者:([^/]+)</li>");
|
||||
Matcher authoreMatch = authorPatten.matcher(body);
|
||||
if (authoreMatch.find()) {
|
||||
String author = authoreMatch.group(1);
|
||||
|
||||
Pattern statusPatten = compile("状态:([^/]+)</li>");
|
||||
Matcher statusMatch = statusPatten.matcher(body);
|
||||
if (statusMatch.find()) {
|
||||
String status = statusMatch.group(1);
|
||||
|
||||
Pattern catPatten = compile("类别:([^/]+)</li>");
|
||||
Matcher catMatch = catPatten.matcher(body);
|
||||
if (catMatch.find()) {
|
||||
String catName = catMatch.group(1);
|
||||
int catNum = getCatNum(catName);
|
||||
Pattern updateTimePatten = compile("更新:(\\d+-\\d+-\\d+\\s\\d+:\\d+:\\d+)</a>");
|
||||
Matcher updateTimeMatch = updateTimePatten.matcher(body);
|
||||
if (updateTimeMatch.find()) {
|
||||
String updateTimeStr = updateTimeMatch.group(1);
|
||||
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
|
||||
Date updateTime = format.parse(updateTimeStr);
|
||||
Pattern picPatten = compile("<img src=\"([^>]+)\"\\s+onerror=\"this.src=");
|
||||
Matcher picMather = picPatten.matcher(body);
|
||||
if (picMather.find()) {
|
||||
String picSrc = picMather.group(1);
|
||||
|
||||
|
||||
Pattern descPatten = compile("class=\"review\">([^<]+)</p>");
|
||||
Matcher descMatch = descPatten.matcher(body);
|
||||
if (descMatch.find()) {
|
||||
String desc = descMatch.group(1);
|
||||
|
||||
|
||||
Book book = new Book();
|
||||
book.setAuthor(author);
|
||||
book.setCatid(catNum);
|
||||
book.setBookDesc(desc);
|
||||
book.setBookName(bookName);
|
||||
book.setScore(score > 10 ? 8.0f : score);
|
||||
book.setPicUrl(picSrc);
|
||||
book.setBookStatus(status);
|
||||
book.setUpdateTime(updateTime);
|
||||
|
||||
List<BookIndex> indexList = new ArrayList<>();
|
||||
List<BookContent> contentList = new ArrayList<>();
|
||||
|
||||
//读取目录
|
||||
Pattern indexPatten = compile("<a\\s+href=\"(/bqge\\d+/all\\.html)\">查看完整目录</a>");
|
||||
Matcher indexMatch = indexPatten.matcher(body);
|
||||
if (indexMatch.find()) {
|
||||
String indexUrl = baseUrl + indexMatch.group(1);
|
||||
String body2 = getByRestTemplate(indexUrl);
|
||||
if (body2 != null) {
|
||||
Pattern indexListPatten = compile("<a[^/]+style[^/]+href=\"(/bqge\\d+/\\d+\\.html)\">([^/]+)</a>");
|
||||
Matcher indexListMatch = indexListPatten.matcher(body2);
|
||||
|
||||
boolean isFindIndex = indexListMatch.find();
|
||||
|
||||
int indexNum = 0;
|
||||
|
||||
//查询该书籍已存在目录号
|
||||
List<Integer> hasIndexNum = bookService.queryIndexNumByBookNameAndAuthor(bookName, author);
|
||||
//只更新已存在的书籍
|
||||
if (hasIndexNum.size() > 0) {
|
||||
while (isFindIndex) {
|
||||
if (!hasIndexNum.contains(indexNum)) {
|
||||
|
||||
String contentUrl = baseUrl + indexListMatch.group(1);
|
||||
String indexName = indexListMatch.group(2);
|
||||
|
||||
|
||||
//查询章节内容
|
||||
String body3 = getByRestTemplate(contentUrl);
|
||||
if (body3 != null) {
|
||||
String start = "『章节错误,点此举报』";
|
||||
String end = "『加入书签,方便阅读』";
|
||||
String content = body3.substring(body3.indexOf(start) + start.length(), body3.indexOf(end));
|
||||
//TODO插入章节目录和章节内容
|
||||
BookIndex bookIndex = new BookIndex();
|
||||
bookIndex.setIndexName(indexName);
|
||||
bookIndex.setIndexNum(indexNum);
|
||||
indexList.add(bookIndex);
|
||||
BookContent bookContent = new BookContent();
|
||||
bookContent.setContent(content);
|
||||
bookContent.setIndexNum(indexNum);
|
||||
contentList.add(bookContent);
|
||||
|
||||
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
indexNum++;
|
||||
isFindIndex = indexListMatch.find();
|
||||
}
|
||||
|
||||
if (indexList.size() == contentList.size() && indexList.size() > 0) {
|
||||
ExcutorUtils.excuteFixedTask(() -> bookService.saveBookAndIndexAndContent(book, indexList, contentList));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
} catch (Exception e) {
|
||||
|
||||
e.printStackTrace();
|
||||
|
||||
} finally {
|
||||
bookMatcher.find();
|
||||
isFind = bookMatcher.find();
|
||||
scoreFind = scoreMatch.find();
|
||||
isBookNameMatch = bookNameMatch.find();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private int getCatNum(String catName) {
|
||||
int catNum;
|
||||
switch (catName) {
|
||||
case "武侠仙侠": {
|
||||
catNum = 2;
|
||||
break;
|
||||
}
|
||||
case "都市言情": {
|
||||
catNum = 3;
|
||||
break;
|
||||
}
|
||||
case "历史军事": {
|
||||
catNum = 4;
|
||||
break;
|
||||
}
|
||||
case "科幻灵异": {
|
||||
catNum = 5;
|
||||
break;
|
||||
}
|
||||
case "网游竞技": {
|
||||
catNum = 6;
|
||||
break;
|
||||
}
|
||||
case "女生频道": {
|
||||
catNum = 7;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
catNum = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return catNum;
|
||||
}
|
||||
|
||||
private String getByRestTemplate(String url) {
|
||||
try {
|
||||
ResponseEntity<String> forEntity = utf8RestTemplate.getForEntity(url, String.class);
|
||||
if (forEntity.getStatusCode() == HttpStatus.OK) {
|
||||
return forEntity.getBody();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,179 @@
|
||||
package xyz.zinglizingli.common.utils;
|
||||
|
||||
/**
|
||||
* @author 11797
|
||||
*/
|
||||
public class CatUtil {
|
||||
|
||||
public static int getCatNum(String catName) {
|
||||
int catNum;
|
||||
switch (catName) {
|
||||
case "武侠仙侠": {
|
||||
catNum = 2;
|
||||
break;
|
||||
}
|
||||
case "都市言情": {
|
||||
catNum = 3;
|
||||
break;
|
||||
}
|
||||
case "历史军事": {
|
||||
catNum = 4;
|
||||
break;
|
||||
}
|
||||
case "科幻灵异": {
|
||||
catNum = 5;
|
||||
break;
|
||||
}
|
||||
case "网游竞技": {
|
||||
catNum = 6;
|
||||
break;
|
||||
}
|
||||
case "女生频道": {
|
||||
catNum = 7;
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
catNum = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return catNum;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 查询轻小说分类名
|
||||
* */
|
||||
public static String getSoftCatNameById(Integer softCat) {
|
||||
String catName = "其他";
|
||||
|
||||
switch (softCat) {
|
||||
case 21: {
|
||||
catName = "魔幻";
|
||||
break;
|
||||
}
|
||||
case 22: {
|
||||
catName = "玄幻";
|
||||
break;
|
||||
}
|
||||
case 23: {
|
||||
catName = "古风";
|
||||
break;
|
||||
}
|
||||
case 24: {
|
||||
catName = "科幻";
|
||||
break;
|
||||
}
|
||||
case 25: {
|
||||
catName = "校园";
|
||||
break;
|
||||
}
|
||||
case 26: {
|
||||
catName = "都市";
|
||||
break;
|
||||
}
|
||||
case 27: {
|
||||
catName = "游戏";
|
||||
break;
|
||||
}
|
||||
case 28: {
|
||||
catName = "同人";
|
||||
break;
|
||||
}
|
||||
case 29: {
|
||||
catName = "悬疑";
|
||||
break;
|
||||
}
|
||||
case 0: {
|
||||
catName = "动漫";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
return catName;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询漫画分类名
|
||||
* */
|
||||
public static String getMhCatNameById(Integer softCat) {
|
||||
String catName = "其他";
|
||||
|
||||
switch (softCat) {
|
||||
case 3262: {
|
||||
catName = "少年漫";
|
||||
break;
|
||||
}
|
||||
case 3263: {
|
||||
catName = "少女漫";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
return catName;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 获取分类名
|
||||
* */
|
||||
public static String getCatNameById(Integer catid) {
|
||||
String catName = "其他";
|
||||
|
||||
switch (catid) {
|
||||
case 1: {
|
||||
catName = "玄幻奇幻";
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
catName = "武侠仙侠";
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
catName = "都市言情";
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
catName = "历史军事";
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
catName = "科幻灵异";
|
||||
break;
|
||||
}
|
||||
case 6: {
|
||||
catName = "网游竞技";
|
||||
break;
|
||||
}
|
||||
case 7: {
|
||||
catName = "女生频道";
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
catName = "轻小说";
|
||||
break;
|
||||
}
|
||||
case 9: {
|
||||
catName = "漫画";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
return catName;
|
||||
}
|
||||
}
|
@ -85,4 +85,9 @@ public class Constants {
|
||||
* 多本书籍ID分隔符
|
||||
* */
|
||||
public static final String BOOK_ID_SEPARATOR = "-";
|
||||
|
||||
/**
|
||||
* 没有内容的描述
|
||||
* */
|
||||
public static final String NO_CONTENT_DESC = "正在手打中,请稍等片刻,内容更新后,需要重新刷新页面,才能获取最新更新";
|
||||
}
|
||||
|
@ -1,5 +1,9 @@
|
||||
package xyz.zinglizingli.common.utils;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.codec.Charsets;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
|
||||
import org.springframework.http.converter.HttpMessageConverter;
|
||||
import org.springframework.http.converter.StringHttpMessageConverter;
|
||||
@ -10,6 +14,10 @@ import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author 11797
|
||||
*/
|
||||
@Slf4j
|
||||
public class RestTemplateUtil {
|
||||
|
||||
private static Map<String,RestTemplate> restTemplateMap = new HashMap<>();
|
||||
@ -35,4 +43,18 @@ public class RestTemplateUtil {
|
||||
return restTemplate;
|
||||
}
|
||||
|
||||
public static String getBodyByUtf8(String url) {
|
||||
try {
|
||||
ResponseEntity<String> forEntity = getInstance(Charsets.UTF_8).getForEntity(url, String.class);
|
||||
if (forEntity.getStatusCode() == HttpStatus.OK) {
|
||||
return forEntity.getBody();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
33
novel-front/src/main/resources/application-crawl.yml
Normal file
33
novel-front/src/main/resources/application-crawl.yml
Normal file
@ -0,0 +1,33 @@
|
||||
#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔 更多网站解析中,敬请期待
|
||||
biquta:
|
||||
crawlsource:
|
||||
enabled: true #是否开启此爬虫源
|
||||
index-url: https://m.biquta.la
|
||||
list-page-url: https://m.biquta.la/class/{0}/{1}.html
|
||||
book-url-pattern: href="/(\d+_\d+)/"
|
||||
score-pattern: <div\s+class="score">(\d+\.\d+)分</div>
|
||||
book-name-pattern: <p class="title">([^/]+)</p>
|
||||
author-pattern: 作者:([^/]+)<
|
||||
status-pattern: 状态:([^/]+)</li>
|
||||
cat-pattern: 类别:([^/]+)</li>
|
||||
update-time-pattern: 更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a>
|
||||
pic-pattern: <img src="([^>]+)"\s+onerror="this.src=
|
||||
intro-pattern: class="review">([^<]+)</p>
|
||||
catalog-url-pattern: <a\s+href="(/du/\d+_\d+/)">查看完整目录</a>
|
||||
catalog-pattern: <a\s+style=""\s+href="(/\d+_\d+/\d+\.html)">([^/]+)</a>
|
||||
biqudao:
|
||||
crawlsource:
|
||||
enabled: true #是否开启此爬虫源
|
||||
index-url: https://m.biqudao.com
|
||||
list-page-url: https://m.biqudao.com/bqgeclass/{0}/{1}.html
|
||||
book-url-pattern: href="/(bqge\d+)/"
|
||||
score-pattern: <div\s+class="score">(\d+\.\d+)分</div>
|
||||
book-name-pattern: <p class="title">([^/]+)</p>
|
||||
author-pattern: <li class="author">作者:([^/]+)</li>
|
||||
status-pattern: 状态:([^/]+)</li>
|
||||
cat-pattern: 类别:([^/]+)</li>
|
||||
update-time-pattern: 更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a>
|
||||
pic-pattern: <img src="([^>]+)"\s+onerror="this.src=
|
||||
intro-pattern: class="review">([^<]+)</p>
|
||||
catalog-url-pattern: <a\s+href="(/bqge\d+/all\.html)">查看完整目录</a>
|
||||
catalog-pattern: <a[^/]+style[^/]+href="(/bqge\d+/\d+\.html)">([^/]+)</a>
|
@ -4,8 +4,8 @@ server:
|
||||
spring:
|
||||
datasource:
|
||||
url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai
|
||||
username: books
|
||||
password: books
|
||||
username: root
|
||||
password: test123456
|
||||
# url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai
|
||||
# username: root
|
||||
# password: test123456
|
||||
@ -39,6 +39,8 @@ spring:
|
||||
port: 465
|
||||
class: javax.net.ssl.SSLSocketFactory
|
||||
fallback: false
|
||||
profiles:
|
||||
include: crawl
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user