Compare commits

..

4 Commits

7 changed files with 55 additions and 50 deletions

View File

@ -1,5 +1,4 @@
<p align="center"> <p align="center">
<a href="https://www.swiftproxy.net/?code=T2WV1VT50"><img src="https://xxyopen.com/images/ad1.png" alt="AD" ></a>
<a href="https://cloud.tencent.com/act/cps/redirect?redirect=2446&cps_key=736e609d66e0ac4e57813316cec6fd0b&from=console"><img src="https://youdoc.github.io/img/tencent.jpg" alt="AD" ></a> <a href="https://cloud.tencent.com/act/cps/redirect?redirect=2446&cps_key=736e609d66e0ac4e57813316cec6fd0b&from=console"><img src="https://youdoc.github.io/img/tencent.jpg" alt="AD" ></a>
</p> </p>
<p align="center"> <p align="center">

View File

@ -5,7 +5,7 @@
<groupId>com.java2nb</groupId> <groupId>com.java2nb</groupId>
<artifactId>novel-admin</artifactId> <artifactId>novel-admin</artifactId>
<version>5.1.1</version> <version>5.1.2</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<name>novel-admin</name> <name>novel-admin</name>

View File

@ -5,7 +5,7 @@
<parent> <parent>
<artifactId>novel</artifactId> <artifactId>novel</artifactId>
<groupId>com.java2nb</groupId> <groupId>com.java2nb</groupId>
<version>5.1.1</version> <version>5.1.2</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -5,7 +5,7 @@
<parent> <parent>
<artifactId>novel</artifactId> <artifactId>novel</artifactId>
<groupId>com.java2nb</groupId> <groupId>com.java2nb</groupId>
<version>5.1.1</version> <version>5.1.2</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -34,6 +34,7 @@ import org.mybatis.dynamic.sql.render.RenderingStrategies;
import org.mybatis.dynamic.sql.select.render.SelectStatementProvider; import org.mybatis.dynamic.sql.select.render.SelectStatementProvider;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.time.Duration;
import java.util.*; import java.util.*;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.regex.Matcher; import java.util.regex.Matcher;
@ -256,65 +257,70 @@ public class CrawlServiceImpl implements CrawlService {
try { try {
String catIdRule = ruleBean.getCatIdRule().get("catId" + catId); String catIdRule = ruleBean.getCatIdRule().get("catId" + catId);
if (StringUtils.isNotBlank(catIdRule)) { if (StringUtils.isBlank(catIdRule) || Thread.currentThread().isInterrupted()) {
String catBookListUrl = ""; return;
if (StringUtils.isNotBlank(ruleBean.getBookListUrl())) { }
// 兼容老规则 String catBookListUrl = "";
// 拼接分类URL if (StringUtils.isNotBlank(ruleBean.getBookListUrl())) {
catBookListUrl = ruleBean.getBookListUrl() // 兼容老规则
.replace("{catId}", catIdRule) // 拼接分类URL
.replace("{page}", page + ""); catBookListUrl = ruleBean.getBookListUrl()
} else { .replace("{catId}", catIdRule)
// 新规则 .replace("{page}", page + "");
// 拼接分类URL } else {
catBookListUrl = catIdRule.replace("{page}", page + ""); // 新规则
} // 拼接分类URL
log.info("catBookListUrl{}", catBookListUrl); catBookListUrl = catIdRule.replace("{page}", page + "");
}
log.info("catBookListUrl{}", catBookListUrl);
String bookListHtml = crawlHttpClient.get(catBookListUrl, ruleBean.getCharset()); String bookListHtml = crawlHttpClient.get(catBookListUrl, ruleBean.getCharset());
if (bookListHtml != null) { if (bookListHtml != null) {
Pattern bookIdPatten = Pattern.compile(ruleBean.getBookIdPatten()); Pattern bookIdPatten = Pattern.compile(ruleBean.getBookIdPatten());
Matcher bookIdMatcher = bookIdPatten.matcher(bookListHtml); Matcher bookIdMatcher = bookIdPatten.matcher(bookListHtml);
boolean isFindBookId = bookIdMatcher.find(); boolean isFindBookId = bookIdMatcher.find();
while (isFindBookId) { while (isFindBookId) {
try { try {
//1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时) //1.阻塞过程(使用了 sleep,同步锁的 wait,socket 中的 receiver,accept 等方法时)
//捕获中断异常InterruptedException来退出线程。 //捕获中断异常InterruptedException来退出线程。
//2.非阻塞过程中通过判断中断标志来退出线程。 //2.非阻塞过程中通过判断中断标志来退出线程。
if (Thread.currentThread().isInterrupted()) { if (Thread.currentThread().isInterrupted()) {
return; return;
}
String bookId = bookIdMatcher.group(1);
parseBookAndSave(catId, ruleBean, sourceId, bookId);
} catch (Exception e) {
log.error(e.getMessage(), e);
} }
isFindBookId = bookIdMatcher.find(); String bookId = bookIdMatcher.group(1);
parseBookAndSave(catId, ruleBean, sourceId, bookId);
} catch (Exception e) {
log.error(e.getMessage(), e);
} }
Pattern totalPagePatten = Pattern.compile(ruleBean.getTotalPagePatten()); isFindBookId = bookIdMatcher.find();
Matcher totalPageMatcher = totalPagePatten.matcher(bookListHtml); }
boolean isFindTotalPage = totalPageMatcher.find();
if (isFindTotalPage) {
totalPage = Integer.parseInt(totalPageMatcher.group(1)); Pattern totalPagePatten = Pattern.compile(ruleBean.getTotalPagePatten());
Matcher totalPageMatcher = totalPagePatten.matcher(bookListHtml);
} boolean isFindTotalPage = totalPageMatcher.find();
if (isFindTotalPage) {
totalPage = Integer.parseInt(totalPageMatcher.group(1));
} }
} }
} catch (Exception e) { } catch (Exception e) {
log.error(e.getMessage(), e); log.error(e.getMessage(), e);
} }
if (page == totalPage) { if (page >= totalPage) {
// 第一遍采集完成,翻到第一页,继续第二次采集,适用于分页数比较少的最近更新列表 // 第一遍采集完成,翻到第一页,继续第二次采集,适用于分页数比较少的最近更新列表
page = 0; page = 1;
try {
// 第一遍采集完成休眠1分钟
Thread.sleep(Duration.ofMinutes(1));
} catch (InterruptedException e) {
log.error(e.getMessage(), e);
}
}else{
page += 1;
} }
page += 1;
} }

View File

@ -5,7 +5,7 @@
<parent> <parent>
<artifactId>novel</artifactId> <artifactId>novel</artifactId>
<groupId>com.java2nb</groupId> <groupId>com.java2nb</groupId>
<version>5.1.1</version> <version>5.1.2</version>
</parent> </parent>
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>

View File

@ -5,7 +5,7 @@
<groupId>com.java2nb</groupId> <groupId>com.java2nb</groupId>
<artifactId>novel</artifactId> <artifactId>novel</artifactId>
<version>5.1.1</version> <version>5.1.2</version>
<modules> <modules>
<module>novel-common</module> <module>novel-common</module>
<module>novel-front</module> <module>novel-front</module>