mirror of
https://github.com/201206030/novel-plus.git
synced 2025-07-01 23:26:38 +00:00
Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
2c86cb9a7d | |||
a4d6272a4f | |||
55d5deea74 | |||
4f474b91a8 |
@ -1,5 +1,4 @@
|
||||
<p align="center">
|
||||
<a href="https://www.swiftproxy.net/?code=T2WV1VT50"><img src="https://xxyopen.com/images/ad1.png" alt="AD" ></a>
|
||||
<a href="https://cloud.tencent.com/act/cps/redirect?redirect=2446&cps_key=736e609d66e0ac4e57813316cec6fd0b&from=console"><img src="https://youdoc.github.io/img/tencent.jpg" alt="AD" ></a>
|
||||
</p>
|
||||
<p align="center">
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
<groupId>com.java2nb</groupId>
|
||||
<artifactId>novel-admin</artifactId>
|
||||
<version>5.1.1</version>
|
||||
<version>5.1.2</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>novel-admin</name>
|
||||
|
@ -5,7 +5,7 @@
|
||||
<parent>
|
||||
<artifactId>novel</artifactId>
|
||||
<groupId>com.java2nb</groupId>
|
||||
<version>5.1.1</version>
|
||||
<version>5.1.2</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
<parent>
|
||||
<artifactId>novel</artifactId>
|
||||
<groupId>com.java2nb</groupId>
|
||||
<version>5.1.1</version>
|
||||
<version>5.1.2</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
@ -34,6 +34,7 @@ import org.mybatis.dynamic.sql.render.RenderingStrategies;
|
||||
import org.mybatis.dynamic.sql.select.render.SelectStatementProvider;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.regex.Matcher;
|
||||
@ -256,65 +257,70 @@ public class CrawlServiceImpl implements CrawlService {
|
||||
|
||||
try {
|
||||
String catIdRule = ruleBean.getCatIdRule().get("catId" + catId);
|
||||
if (StringUtils.isNotBlank(catIdRule)) {
|
||||
String catBookListUrl = "";
|
||||
if (StringUtils.isNotBlank(ruleBean.getBookListUrl())) {
|
||||
// 兼容老规则
|
||||
// 拼接分类URL
|
||||
catBookListUrl = ruleBean.getBookListUrl()
|
||||
.replace("{catId}", catIdRule)
|
||||
.replace("{page}", page + "");
|
||||
} else {
|
||||
// 新规则
|
||||
// 拼接分类URL
|
||||
catBookListUrl = catIdRule.replace("{page}", page + "");
|
||||
}
|
||||
log.info("catBookListUrl:{}", catBookListUrl);
|
||||
if (StringUtils.isBlank(catIdRule) || Thread.currentThread().isInterrupted()) {
|
||||
return;
|
||||
}
|
||||
String catBookListUrl = "";
|
||||
if (StringUtils.isNotBlank(ruleBean.getBookListUrl())) {
|
||||
// 兼容老规则
|
||||
// 拼接分类URL
|
||||
catBookListUrl = ruleBean.getBookListUrl()
|
||||
.replace("{catId}", catIdRule)
|
||||
.replace("{page}", page + "");
|
||||
} else {
|
||||
// 新规则
|
||||
// 拼接分类URL
|
||||
catBookListUrl = catIdRule.replace("{page}", page + "");
|
||||
}
|
||||
log.info("catBookListUrl:{}", catBookListUrl);
|
||||
|
||||
String bookListHtml = crawlHttpClient.get(catBookListUrl, ruleBean.getCharset());
|
||||
if (bookListHtml != null) {
|
||||
Pattern bookIdPatten = Pattern.compile(ruleBean.getBookIdPatten());
|
||||
Matcher bookIdMatcher = bookIdPatten.matcher(bookListHtml);
|
||||
boolean isFindBookId = bookIdMatcher.find();
|
||||
while (isFindBookId) {
|
||||
try {
|
||||
//1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时)
|
||||
//捕获中断异常InterruptedException来退出线程。
|
||||
//2.非阻塞过程中通过判断中断标志来退出线程。
|
||||
if (Thread.currentThread().isInterrupted()) {
|
||||
return;
|
||||
}
|
||||
|
||||
String bookId = bookIdMatcher.group(1);
|
||||
parseBookAndSave(catId, ruleBean, sourceId, bookId);
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
String bookListHtml = crawlHttpClient.get(catBookListUrl, ruleBean.getCharset());
|
||||
if (bookListHtml != null) {
|
||||
Pattern bookIdPatten = Pattern.compile(ruleBean.getBookIdPatten());
|
||||
Matcher bookIdMatcher = bookIdPatten.matcher(bookListHtml);
|
||||
boolean isFindBookId = bookIdMatcher.find();
|
||||
while (isFindBookId) {
|
||||
try {
|
||||
//1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时)
|
||||
//捕获中断异常InterruptedException来退出线程。
|
||||
//2.非阻塞过程中通过判断中断标志来退出线程。
|
||||
if (Thread.currentThread().isInterrupted()) {
|
||||
return;
|
||||
}
|
||||
|
||||
isFindBookId = bookIdMatcher.find();
|
||||
String bookId = bookIdMatcher.group(1);
|
||||
parseBookAndSave(catId, ruleBean, sourceId, bookId);
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
|
||||
Pattern totalPagePatten = Pattern.compile(ruleBean.getTotalPagePatten());
|
||||
Matcher totalPageMatcher = totalPagePatten.matcher(bookListHtml);
|
||||
boolean isFindTotalPage = totalPageMatcher.find();
|
||||
if (isFindTotalPage) {
|
||||
isFindBookId = bookIdMatcher.find();
|
||||
}
|
||||
|
||||
totalPage = Integer.parseInt(totalPageMatcher.group(1));
|
||||
|
||||
}
|
||||
Pattern totalPagePatten = Pattern.compile(ruleBean.getTotalPagePatten());
|
||||
Matcher totalPageMatcher = totalPagePatten.matcher(bookListHtml);
|
||||
boolean isFindTotalPage = totalPageMatcher.find();
|
||||
if (isFindTotalPage) {
|
||||
|
||||
totalPage = Integer.parseInt(totalPageMatcher.group(1));
|
||||
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
if (page == totalPage) {
|
||||
if (page >= totalPage) {
|
||||
// 第一遍采集完成,翻到第一页,继续第二次采集,适用于分页数比较少的最近更新列表
|
||||
page = 0;
|
||||
page = 1;
|
||||
try {
|
||||
// 第一遍采集完成,休眠1分钟
|
||||
Thread.sleep(Duration.ofMinutes(1));
|
||||
} catch (InterruptedException e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
}else{
|
||||
page += 1;
|
||||
}
|
||||
|
||||
page += 1;
|
||||
}
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
<parent>
|
||||
<artifactId>novel</artifactId>
|
||||
<groupId>com.java2nb</groupId>
|
||||
<version>5.1.1</version>
|
||||
<version>5.1.2</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
|
Reference in New Issue
Block a user