novel/src/main/java/xyz/zinglizingli/common/schedule/CrawlBooksSchedule.java
2019-11-14 08:01:39 +08:00

801 lines
39 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package xyz.zinglizingli.common.schedule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import xyz.zinglizingli.books.po.Book;
import xyz.zinglizingli.books.po.BookContent;
import xyz.zinglizingli.books.po.BookIndex;
import xyz.zinglizingli.books.service.BookService;
import xyz.zinglizingli.books.util.ExcutorUtils;
import xyz.zinglizingli.common.utils.RestTemplateUtil;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Service
public class CrawlBooksSchedule {
private Logger log = LoggerFactory.getLogger(CrawlBooksSchedule.class);
@Autowired
private BookService bookService;
RestTemplate restTemplate = RestTemplateUtil.getInstance("utf-8");
@Value("${books.lowestScore}")
private Float lowestScore;
@Value("${crawl.website.type}")
private Byte websiteType;
private boolean isExcuting = false;
@Scheduled(fixedRate = 1000 * 60 * 60 * 3)
public void crawBquge11BooksAtDay() throws Exception {
if (!isExcuting) {
isExcuting = true;
log.debug("crawlBooksSchedule执行中。。。。。。。。。。。。");
while (true) {
try {
switch (websiteType) {
case 1: {
updateBiqudaoBooks(0);
break;
}
case 2: {
updateBiquTaBooks(0);
break;
}
}
Thread.sleep(1000 * 60 * 5);
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
private void updateBiquTaBooks(int finalI) {
String baseUrl = "https://m.biquta.com";
String catBookListUrlBase = baseUrl + "/class/";
int page = 1;//起始页码
int totalPage = page;
String catBookListUrl = catBookListUrlBase + finalI + "/" + page + ".html";
String forObject = getByHttpClient(catBookListUrl);
if (forObject != null) {
//匹配分页数<input type="text" class="page_txt" value="1/3019" size="5" name="txtPage" id="txtPage" />
Pattern pattern = Pattern.compile("value=\"(\\d+)/(\\d+)\"");
Matcher matcher = pattern.matcher(forObject);
boolean isFind = matcher.find();
System.out.println("匹配分页数" + isFind);
if (isFind) {
int currentPage = Integer.parseInt(matcher.group(1));
totalPage = Integer.parseInt(matcher.group(2));
//解析第一页书籍的数据
Pattern bookPatten = Pattern.compile("href=\"/(\\d+_\\d+)/\"");
parseBiquTaBook(bookPatten, forObject, finalI, baseUrl, true);
}
}
}
private void parseBiquTaBook(Pattern bookPatten, String forObject, int catNum, String baseUrl, boolean isUpdate) {
Matcher matcher2 = bookPatten.matcher(forObject);
boolean isFind = matcher2.find();
Pattern scorePatten = Pattern.compile("<div\\s+class=\"score\">(\\d+\\.\\d+)分</div>");
Matcher scoreMatch = scorePatten.matcher(forObject);
boolean scoreFind = scoreMatch.find();
Pattern bookNamePatten = Pattern.compile("<p class=\"title\">([^/]+)</p>");
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
boolean isBookNameMatch = bookNameMatch.find();
System.out.println("匹配书籍url" + isFind);
System.out.println("匹配分数" + scoreFind);
while (isFind && scoreFind && isBookNameMatch) {
try {
Float score = Float.parseFloat(scoreMatch.group(1));
if (score < lowestScore) {//数据库空间有限暂时爬取8.0分以上的小说
continue;
}
String bokNum = matcher2.group(1);
String bookUrl = baseUrl + "/" + bokNum + "/";
String body = getByHttpClient(bookUrl);
if (body != null) {
String bookName = bookNameMatch.group(1);
Pattern authorPatten = Pattern.compile(">作者:([^/]+)<");
Matcher authoreMatch = authorPatten.matcher(body);
if (authoreMatch.find()) {
String author = authoreMatch.group(1);
Pattern statusPatten = Pattern.compile("状态:([^/]+)</li>");
Matcher statusMatch = statusPatten.matcher(body);
if (statusMatch.find()) {
String status = statusMatch.group(1);
Pattern updateTimePatten = Pattern.compile("更新:(\\d+-\\d+-\\d+\\s\\d+:\\d+:\\d+)</a>");
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
Date updateTime = format.parse(updateTimeStr);
Pattern picPatten = Pattern.compile("<img src=\"([^>]+)\"\\s+onerror=\"this.src=");
Matcher picMather = picPatten.matcher(body);
if (picMather.find()) {
String picSrc = picMather.group(1);
Pattern descPatten = Pattern.compile("class=\"review\">([^<]+)</p>");
Matcher descMatch = descPatten.matcher(body);
if (descMatch.find()) {
String desc = descMatch.group(1);
Book book = new Book();
book.setAuthor(author);
book.setCatid(catNum);
book.setBookDesc(desc);
book.setBookName(bookName);
book.setScore(score > 10 ? 8.0f : score);
book.setPicUrl(picSrc);
book.setBookStatus(status);
book.setUpdateTime(updateTime);
List<BookIndex> indexList = new ArrayList<>();
List<BookContent> contentList = new ArrayList<>();
//读取目录
Pattern indexPatten = Pattern.compile("<a\\s+href=\"(/du/\\d+_\\d+/)\">查看完整目录</a>");
Matcher indexMatch = indexPatten.matcher(body);
if (indexMatch.find()) {
String indexUrl = baseUrl + indexMatch.group(1);
String body2 = getByHttpClient(indexUrl);
if (body2 != null) {
Pattern indexListPatten = Pattern.compile("<a\\s+style=\"\"\\s+href=\"(/\\d+_\\d+/\\d+\\.html)\">([^/]+)</a>");
Matcher indexListMatch = indexListPatten.matcher(body2);
boolean isFindIndex = indexListMatch.find();
int indexNum = 0;
//查询该书籍已存在目录号
List<Integer> hasIndexNum = bookService.queryIndexCountByBookNameAndBAuthor(bookName, author);
//更新和插入分别开,插入只在凌晨做一次
if ((isUpdate && hasIndexNum.size() > 0) || (!isUpdate && hasIndexNum.size() == 0)) {
while (isFindIndex) {
if (!hasIndexNum.contains(indexNum)) {
String contentUrl = baseUrl + indexListMatch.group(1);
String indexName = indexListMatch.group(2);
//查询章节内容
String body3 = getByHttpClient(contentUrl);
if (body3 != null) {
Pattern contentPattten = Pattern.compile("章节错误,点此举报(.*)加入书签,方便阅读");
String start = "『章节错误,点此举报』";
String end = "『加入书签,方便阅读』";
String content = body3.substring(body3.indexOf(start) + start.length(), body3.indexOf(end));
//TODO插入章节目录和章节内容
BookIndex bookIndex = new BookIndex();
bookIndex.setIndexName(indexName);
bookIndex.setIndexNum(indexNum);
indexList.add(bookIndex);
BookContent bookContent = new BookContent();
bookContent.setContent(content);
bookContent.setIndexNum(indexNum);
contentList.add(bookContent);
} else {
break;
}
}
indexNum++;
isFindIndex = indexListMatch.find();
}
if (indexList.size() == contentList.size() && indexList.size() > 0) {
ExcutorUtils.excuteFixedTask(new Runnable() {
@Override
public void run() {
bookService.saveBookAndIndexAndContent(book, indexList, contentList);
}
});
}
}
}
}
}
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
matcher2.find();
isFind = matcher2.find();//需要找两次,应为有两个一样的路径匹配
scoreFind = scoreMatch.find();
isBookNameMatch = bookNameMatch.find();
}
}
}
private void updateBiqudaoBooks(int finalI) {
String baseUrl = "https://m.biqudao.com";
String catBookListUrlBase = baseUrl + "/bqgeclass/";
int page = 1;//起始页码
int totalPage = page;
String catBookListUrl = catBookListUrlBase + finalI + "/" + page + ".html";
String forObject = getByHttpClient(catBookListUrl);
if (forObject != null) {
//匹配分页数<input type="text" class="page_txt" value="1/3019" size="5" name="txtPage" id="txtPage" />
Pattern pattern = Pattern.compile("value=\"(\\d+)/(\\d+)\"");
Matcher matcher = pattern.matcher(forObject);
boolean isFind = matcher.find();
System.out.println("匹配分页数" + isFind);
if (isFind) {
int currentPage = Integer.parseInt(matcher.group(1));
totalPage = Integer.parseInt(matcher.group(2));
//解析第一页书籍的数据
Pattern bookPatten = Pattern.compile("href=\"/(bqge\\d+)/\"");
//白天更新
parseBiquDaoBook(bookPatten, forObject, finalI, baseUrl, true);
}
}
}
//@Scheduled(cron = "0 0 2 * * ?")磁盘空间不足,暂时不抓新书
//暂定2小说只爬分类前3本书一共3*7=21本书爬等以后书籍多了之后会适当缩短更新间隔
public void crawBquge11BooksAtNight() throws Exception {
final String baseUrl = "https://m.biqudao.com";
log.debug("crawlBooksSchedule执行中。。。。。。。。。。。。");
//①爬分类列表的书籍url和总页数
// https:
////m.biquta.com/class/1/1.html
// https:
////m.biquta.com/class/2/1.html
// https:
////m.biquta.com/class/2/2.html
//
//
// https:
////m.biquta.com/class/2/2.html
//<input type = "text" class="page_txt" value = "2/1244" size = "5" name = "txtPage" id = "txtPage" >
//
//
//<div class="bookinfo" >
// <a href = "/14_14988/" >
// <div class="detail" >
// <p class="title" > 苍穹九变 </p >
// <p class="author" > 作者:风起闲云</p >
// </div >
// <div class="score" > 7.5 分</div >
// </a >
// </div >
//
//
//第一周期全部书拉取完后,可进行第二周期,只拉取前面几页的数据,拉取时间间隔变小
log.debug("crawlBooksSchedule循环执行中。。。。。。。。。。。。");
//List<Integer> classIdList = new ArrayList<>(Arrays.asList(new Integer[]{1,2,3,4,5,6,7}));
// for (int i = 1; i <= 7; i++) {
// log.debug("crawlBooksSchedule分类"+i+"执行中。。。。。。。。。。。。");
// int finalI = i;
/* new Thread(
() -> {*/
try {
//先随机更新分类
//Random random = new Random();
//int finalI = classIdList.get(new Random().nextInt(classIdList.size()));
//classIdList.remove(finalI);
int finalI = 0;
//拼接分类URL
int page = 1;//起始页码
int totalPage = page;
String catBookListUrl = baseUrl + "/bqgeclass/" + finalI + "/" + page + ".html";
String forObject = getByHttpClient(catBookListUrl);
if (forObject != null) {
//匹配分页数<input type="text" class="page_txt" value="1/3019" size="5" name="txtPage" id="txtPage" />
Pattern pattern = Pattern.compile("value=\"(\\d+)/(\\d+)\"");
Matcher matcher = pattern.matcher(forObject);
boolean isFind = matcher.find();
System.out.println("匹配分页数" + isFind);
if (isFind) {
int currentPage = Integer.parseInt(matcher.group(1));
totalPage = Integer.parseInt(matcher.group(2));
//解析第一页书籍的数据
Pattern bookPatten = Pattern.compile("href=\"/(bqge\\d+)/\"");
//晚上插入
parseBiquDaoBook(bookPatten, forObject, finalI, baseUrl, false);
while (currentPage < totalPage) {
if (new Date().getHours() > 5) {
break;
}
catBookListUrl = baseUrl + "/bqgeclass/" + finalI + "/" + (currentPage + 1) + ".html";
forObject = getByHttpClient(catBookListUrl);
if (forObject != null) {
//匹配分页数
matcher = pattern.matcher(forObject);
isFind = matcher.find();
if (isFind) {
currentPage = Integer.parseInt(matcher.group(1));
totalPage = Integer.parseInt(matcher.group(2));
parseBiquDaoBook(bookPatten, forObject, finalI, baseUrl, false);
}
} else {
currentPage++;
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
/* }
).start();*/
// }
}
private void parseBiquDaoBook(Pattern bookPatten, String forObject, int catNum, String baseUrl, boolean isUpdate) {
Matcher matcher2 = bookPatten.matcher(forObject);
boolean isFind = matcher2.find();
Pattern scorePatten = Pattern.compile("<div\\s+class=\"score\">(\\d+\\.\\d+)分</div>");
Matcher scoreMatch = scorePatten.matcher(forObject);
boolean scoreFind = scoreMatch.find();
Pattern bookNamePatten = Pattern.compile("<p class=\"title\">([^/]+)</p>");
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
boolean isBookNameMatch = bookNameMatch.find();
System.out.println("匹配书籍url" + isFind);
System.out.println("匹配分数" + scoreFind);
while (isFind && scoreFind && isBookNameMatch) {
try {
Float score = Float.parseFloat(scoreMatch.group(1));
if (score < lowestScore) {//数据库空间有限暂时爬取8.0分以上的小说
continue;
}
String bokNum = matcher2.group(1);
String bookUrl = baseUrl + "/" + bokNum + "/";
String body = getByHttpClient(bookUrl);
if (body != null) {
String bookName = bookNameMatch.group(1);
Pattern authorPatten = Pattern.compile("<li class=\"author\">作者:([^/]+)</li>");
Matcher authoreMatch = authorPatten.matcher(body);
if (authoreMatch.find()) {
String author = authoreMatch.group(1);
Pattern statusPatten = Pattern.compile("状态:([^/]+)</li>");
Matcher statusMatch = statusPatten.matcher(body);
if (statusMatch.find()) {
String status = statusMatch.group(1);
Pattern catPatten = Pattern.compile("类别:([^/]+)</li>");
Matcher catMatch = catPatten.matcher(body);
if (catMatch.find()) {
String catName = catMatch.group(1);
switch (catName) {
case "玄幻奇幻": {
catNum = 1;
break;
}
case "武侠仙侠": {
catNum = 2;
break;
}
case "都市言情": {
catNum = 3;
break;
}
case "历史军事": {
catNum = 4;
break;
}
case "科幻灵异": {
catNum = 5;
break;
}
case "网游竞技": {
catNum = 6;
break;
}
case "女生频道": {
catNum = 7;
break;
}
default: {
catNum = 1;
break;
}
}
Pattern updateTimePatten = Pattern.compile("更新:(\\d+-\\d+-\\d+\\s\\d+:\\d+:\\d+)</a>");
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
Date updateTime = format.parse(updateTimeStr);
Pattern picPatten = Pattern.compile("<img src=\"([^>]+)\"\\s+onerror=\"this.src=");
Matcher picMather = picPatten.matcher(body);
if (picMather.find()) {
String picSrc = picMather.group(1);
Pattern descPatten = Pattern.compile("class=\"review\">([^<]+)</p>");
Matcher descMatch = descPatten.matcher(body);
if (descMatch.find()) {
String desc = descMatch.group(1);
Book book = new Book();
book.setAuthor(author);
book.setCatid(catNum);
book.setBookDesc(desc);
book.setBookName(bookName);
book.setScore(score > 10 ? 8.0f : score);
book.setPicUrl(picSrc);
book.setBookStatus(status);
book.setUpdateTime(updateTime);
List<BookIndex> indexList = new ArrayList<>();
List<BookContent> contentList = new ArrayList<>();
//读取目录
Pattern indexPatten = Pattern.compile("<a\\s+href=\"(/bqge\\d+/all\\.html)\">查看完整目录</a>");
Matcher indexMatch = indexPatten.matcher(body);
if (indexMatch.find()) {
String indexUrl = baseUrl + indexMatch.group(1);
String body2 = getByHttpClient(indexUrl);
if (body2 != null) {
Pattern indexListPatten = Pattern.compile("<a[^/]+style[^/]+href=\"(/bqge\\d+/\\d+\\.html)\">([^/]+)</a>");
Matcher indexListMatch = indexListPatten.matcher(body2);
boolean isFindIndex = indexListMatch.find();
int indexNum = 0;
//查询该书籍已存在目录号
List<Integer> hasIndexNum = bookService.queryIndexCountByBookNameAndBAuthor(bookName, author);
//更新和插入分别开,插入只在凌晨做一次
if ((isUpdate && hasIndexNum.size() > 0) || (!isUpdate && hasIndexNum.size() == 0)) {
while (isFindIndex) {
if (!hasIndexNum.contains(indexNum)) {
String contentUrl = baseUrl + indexListMatch.group(1);
String indexName = indexListMatch.group(2);
//查询章节内容
String body3 = getByHttpClient(contentUrl);
if (body3 != null) {
Pattern contentPattten = Pattern.compile("章节错误,点此举报(.*)加入书签,方便阅读");
String start = "『章节错误,点此举报』";
String end = "『加入书签,方便阅读』";
String content = body3.substring(body3.indexOf(start) + start.length(), body3.indexOf(end));
//TODO插入章节目录和章节内容
BookIndex bookIndex = new BookIndex();
bookIndex.setIndexName(indexName);
bookIndex.setIndexNum(indexNum);
indexList.add(bookIndex);
BookContent bookContent = new BookContent();
bookContent.setContent(content);
bookContent.setIndexNum(indexNum);
contentList.add(bookContent);
} else {
break;
}
}
indexNum++;
isFindIndex = indexListMatch.find();
}
if (indexList.size() == contentList.size() && indexList.size() > 0) {
ExcutorUtils.excuteFixedTask(new Runnable() {
@Override
public void run() {
bookService.saveBookAndIndexAndContent(book, indexList, contentList);
}
});
}
}
}
}
}
}
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
matcher2.find();
isFind = matcher2.find();//需要找两次,应为有两个一样的路径匹配
scoreFind = scoreMatch.find();
isBookNameMatch = bookNameMatch.find();
}
}
}
private String getByHttpClient(String catBookListUrl) {
try {
/* HttpClient httpClient = new DefaultHttpClient();
HttpGet getReq = new HttpGet(catBookListUrl);
getReq.setHeader("user-agent", "Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1");
HttpResponse execute = httpClient.execute(getReq);
if (execute.getStatusLine().getStatusCode() == HttpStatus.OK.value()) {
HttpEntity entity = execute.getEntity();
return EntityUtils.toString(entity, "utf-8");
} else {
return null;
}*/
//经测试restTemplate比httpClient效率高出很多倍所有选择restTemplate
ResponseEntity<String> forEntity = restTemplate.getForEntity(catBookListUrl, String.class);
if (forEntity.getStatusCode() == HttpStatus.OK) {
return forEntity.getBody();
} else {
return null;
}
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
/***
* 解析书籍详情之后的页面
*/
private void parseBook(Pattern bookPatten, String forObject, RestTemplate restTemplate, int catNum, String baseUrl) throws ParseException {
Matcher matcher2 = bookPatten.matcher(forObject);
boolean isFind = matcher2.find();
Pattern scorePatten = Pattern.compile("<div\\s+class=\"score\">(\\d+\\.\\d+)分</div>");
Matcher scoreMatch = scorePatten.matcher(forObject);
boolean scoreFind = scoreMatch.find();
Pattern bookNamePatten = Pattern.compile("<p class=\"title\">([^/]+)</p>");
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
boolean isBookNameMatch = bookNameMatch.find();
Pattern authorPatten = Pattern.compile(">作者:([^/]+)<");
Matcher authoreMatch = authorPatten.matcher(forObject);
boolean isFindAuthor = authoreMatch.find();
System.out.println("匹配书籍url" + isFind);
System.out.println("匹配分数" + scoreFind);
while (isFind && scoreFind && isBookNameMatch && isFindAuthor) {
try {
Float score = Float.parseFloat(scoreMatch.group(1));
if (score < lowestScore) {//数据库空间有限暂时爬取8.0分以上的小说
continue;
}
String bookName = bookNameMatch.group(1);
String author = authoreMatch.group(1);
String bokNum = matcher2.group(1);
String bookUrl = baseUrl + "/" + bokNum + "/";
ResponseEntity<String> forEntity = restTemplate.getForEntity(bookUrl, String.class);
if (forEntity.getStatusCode() == HttpStatus.OK) {
String body = forEntity.getBody();
Pattern statusPatten = Pattern.compile("状态:([^/]+)</li>");
Matcher statusMatch = statusPatten.matcher(body);
if (statusMatch.find()) {
String status = statusMatch.group(1);
Pattern updateTimePatten = Pattern.compile("更新:(\\d+-\\d+-\\d+\\s\\d+:\\d+:\\d+)</a>");
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
Date updateTime = format.parse(updateTimeStr);
Pattern picPatten = Pattern.compile("<img src=\"([^>]+)\"\\s+onerror=\"this.src=");
Matcher picMather = picPatten.matcher(body);
if (picMather.find()) {
String picSrc = picMather.group(1);
Pattern descPatten = Pattern.compile("class=\"review\">([^<]+)</p>");
Matcher descMatch = descPatten.matcher(body);
if (descMatch.find()) {
String desc = descMatch.group(1);
Book book = new Book();
book.setAuthor(author);
book.setCatid(catNum);
book.setBookDesc(desc);
book.setBookName(bookName);
book.setScore(score > 10 ? 8.0f : score);
book.setPicUrl(picSrc);
book.setBookStatus(status);
book.setUpdateTime(updateTime);
List<BookIndex> indexList = new ArrayList<>();
List<BookContent> contentList = new ArrayList<>();
//读取目录
Pattern indexPatten = Pattern.compile("<a\\s+href=\"(/du/\\d+_\\d+/)\">查看完整目录</a>");
Matcher indexMatch = indexPatten.matcher(body);
if (indexMatch.find()) {
String indexUrl = baseUrl + indexMatch.group(1);
ResponseEntity<String> forEntity1 = restTemplate.getForEntity(indexUrl, String.class);
if (forEntity1.getStatusCode() == HttpStatus.OK) {
String body2 = forEntity1.getBody();
Pattern indexListPatten = Pattern.compile("<a\\s+style=\"\"\\s+href=\"(/\\d+_\\d+/\\d+\\.html)\">([^/]+)</a>");
Matcher indexListMatch = indexListPatten.matcher(body2);
boolean isFindIndex = indexListMatch.find();
int indexNum = 0;
//查询该书籍已存在目录号
List<Integer> hasIndexNum = bookService.queryIndexCountByBookNameAndBAuthor(bookName, author);
while (isFindIndex) {
if (!hasIndexNum.contains(indexNum)) {
String contentUrl = baseUrl + indexListMatch.group(1);
String indexName = indexListMatch.group(2);
//查询章节内容
ResponseEntity<String> forEntity2 = restTemplate.getForEntity(contentUrl, String.class);
if (forEntity2.getStatusCode() == HttpStatus.OK) {
String body3 = forEntity2.getBody();
Pattern contentPattten = Pattern.compile("章节错误,点此举报(.*)加入书签,方便阅读");
String start = "『章节错误,点此举报』";
String end = "『加入书签,方便阅读』";
String content = body3.substring(body3.indexOf(start) + start.length(), body3.indexOf(end));
//TODO插入章节目录和章节内容
BookIndex bookIndex = new BookIndex();
bookIndex.setIndexName(indexName);
bookIndex.setIndexNum(indexNum);
indexList.add(bookIndex);
BookContent bookContent = new BookContent();
bookContent.setContent(content);
bookContent.setIndexNum(indexNum);
contentList.add(bookContent);
} else {
break;
}
}
indexNum++;
isFindIndex = indexListMatch.find();
}
if (indexList.size() == contentList.size() && indexList.size() > 0) {
bookService.saveBookAndIndexAndContent(book, indexList, contentList);
}
}
}
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
matcher2.find();
isFind = matcher2.find();//需要找两次,应为有两个一样的路径匹配
scoreFind = scoreMatch.find();
isBookNameMatch = bookNameMatch.find();
isFindAuthor = authoreMatch.find();
}
}
}
}