mirror of
https://github.com/201206030/novel-plus.git
synced 2025-04-26 01:10:52 +00:00
perf: 爬虫采集流程优化
This commit is contained in:
parent
6d0ab33757
commit
85b64bbc10
@ -14,12 +14,13 @@ public class HttpUtil {
|
|||||||
|
|
||||||
public static String getByHttpClientWithChrome(String url) {
|
public static String getByHttpClientWithChrome(String url) {
|
||||||
try {
|
try {
|
||||||
|
log.debug("Get url:{}", url);
|
||||||
HttpHeaders headers = new HttpHeaders();
|
HttpHeaders headers = new HttpHeaders();
|
||||||
headers.add("user-agent",
|
headers.add("user-agent",
|
||||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36");
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36");
|
||||||
HttpEntity<String> requestEntity = new HttpEntity<>(null, headers);
|
HttpEntity<String> requestEntity = new HttpEntity<>(null, headers);
|
||||||
ResponseEntity<String> forEntity = REST_TEMPLATE.exchange(url, HttpMethod.GET, requestEntity, String.class);
|
ResponseEntity<String> forEntity = REST_TEMPLATE.exchange(url, HttpMethod.GET, requestEntity, String.class);
|
||||||
|
log.debug("Response code:{}", forEntity.getStatusCode());
|
||||||
if (forEntity.getStatusCode() == HttpStatus.OK) {
|
if (forEntity.getStatusCode() == HttpStatus.OK) {
|
||||||
return forEntity.getBody();
|
return forEntity.getBody();
|
||||||
} else {
|
} else {
|
||||||
|
@ -309,6 +309,10 @@ public class CrawlServiceImpl implements CrawlService {
|
|||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
log.error(e.getMessage(), e);
|
log.error(e.getMessage(), e);
|
||||||
}
|
}
|
||||||
|
if (page == totalPage) {
|
||||||
|
// 第一遍采集完成,翻到第一页,继续第二次采集,适用于分页数比较少的最近更新列表
|
||||||
|
page = 0;
|
||||||
|
}
|
||||||
|
|
||||||
page += 1;
|
page += 1;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user