From 85b64bbc10e3188d8fa0b28ea2ae9a7a49081fde Mon Sep 17 00:00:00 2001 From: xiongxiaoyang <1179705413@qq.com> Date: Fri, 14 Mar 2025 19:27:46 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E7=88=AC=E8=99=AB=E9=87=87=E9=9B=86?= =?UTF-8?q?=E6=B5=81=E7=A8=8B=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/main/java/com/java2nb/novel/core/utils/HttpUtil.java | 3 ++- .../java/com/java2nb/novel/service/impl/CrawlServiceImpl.java | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/novel-common/src/main/java/com/java2nb/novel/core/utils/HttpUtil.java b/novel-common/src/main/java/com/java2nb/novel/core/utils/HttpUtil.java index d85f9c4..47f900a 100644 --- a/novel-common/src/main/java/com/java2nb/novel/core/utils/HttpUtil.java +++ b/novel-common/src/main/java/com/java2nb/novel/core/utils/HttpUtil.java @@ -14,12 +14,13 @@ public class HttpUtil { public static String getByHttpClientWithChrome(String url) { try { + log.debug("Get url:{}", url); HttpHeaders headers = new HttpHeaders(); headers.add("user-agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36"); HttpEntity requestEntity = new HttpEntity<>(null, headers); ResponseEntity forEntity = REST_TEMPLATE.exchange(url, HttpMethod.GET, requestEntity, String.class); - + log.debug("Response code:{}", forEntity.getStatusCode()); if (forEntity.getStatusCode() == HttpStatus.OK) { return forEntity.getBody(); } else { diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java index c5b404d..0a30be5 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/CrawlServiceImpl.java @@ -309,6 +309,10 @@ public class CrawlServiceImpl implements CrawlService { } catch (Exception e) { log.error(e.getMessage(), e); } + if (page == totalPage) { + // 第一遍采集完成,翻到第一页,继续第二次采集,适用于分页数比较少的最近更新列表 + page = 0; + } page += 1; }