mirror of
https://github.com/201206030/novel-plus.git
synced 2025-04-26 17:20:52 +00:00
perf: 爬虫分类规则优化
This commit is contained in:
parent
74d7ea7000
commit
6d0ab33757
@ -255,12 +255,21 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
while (page <= totalPage) {
|
while (page <= totalPage) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
String catIdRule = ruleBean.getCatIdRule().get("catId" + catId);
|
||||||
if (StringUtils.isNotBlank(ruleBean.getCatIdRule().get("catId" + catId))) {
|
if (StringUtils.isNotBlank(catIdRule)) {
|
||||||
|
String catBookListUrl = "";
|
||||||
|
if (StringUtils.isNotBlank(ruleBean.getBookListUrl())) {
|
||||||
|
// 兼容老规则
|
||||||
// 拼接分类URL
|
// 拼接分类URL
|
||||||
String catBookListUrl = ruleBean.getBookListUrl()
|
catBookListUrl = ruleBean.getBookListUrl()
|
||||||
.replace("{catId}", ruleBean.getCatIdRule().get("catId" + catId))
|
.replace("{catId}", catIdRule)
|
||||||
.replace("{page}", page + "");
|
.replace("{page}", page + "");
|
||||||
|
} else {
|
||||||
|
// 新规则
|
||||||
|
// 拼接分类URL
|
||||||
|
catBookListUrl = catIdRule.replace("{page}", page + "");
|
||||||
|
}
|
||||||
|
log.info("catBookListUrl:{}", catBookListUrl);
|
||||||
|
|
||||||
String bookListHtml = crawlHttpClient.get(catBookListUrl);
|
String bookListHtml = crawlHttpClient.get(catBookListUrl);
|
||||||
if (bookListHtml != null) {
|
if (bookListHtml != null) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user