mirror of
https://github.com/201206030/novel-plus.git
synced 2025-06-24 04:46:37 +00:00
feat: 支持非utf-8编码的网站采集
This commit is contained in:
@ -4,22 +4,35 @@ import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.http.*;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
@Slf4j
|
||||
public class HttpUtil {
|
||||
|
||||
private static final RestTemplate REST_TEMPLATE = RestTemplates.newInstance("utf-8");
|
||||
private static final String DEFAULT_CHARSET = "utf-8";
|
||||
|
||||
public static String getByHttpClientWithChrome(String url) {
|
||||
private static final Map<String, RestTemplate> REST_TEMPLATE_MAP = new HashMap<>();
|
||||
|
||||
public static String getByHttpClientWithChrome(String url, String charset) {
|
||||
log.debug("Get url:{}", url);
|
||||
if (!Charset.isSupported(charset)) {
|
||||
log.error("字符编码{}无效!", charset);
|
||||
return null;
|
||||
}
|
||||
RestTemplate restTemplate = REST_TEMPLATE_MAP.computeIfAbsent(charset,
|
||||
k -> RestTemplates.newInstance(charset));
|
||||
try {
|
||||
log.debug("Get url:{}", url);
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
headers.add("user-agent",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36");
|
||||
HttpEntity<String> requestEntity = new HttpEntity<>(null, headers);
|
||||
ResponseEntity<String> forEntity = REST_TEMPLATE.exchange(url, HttpMethod.GET, requestEntity, String.class);
|
||||
ResponseEntity<String> forEntity = restTemplate.exchange(url, HttpMethod.GET, requestEntity,
|
||||
String.class);
|
||||
log.debug("Response code:{}", forEntity.getStatusCode());
|
||||
if (forEntity.getStatusCode() == HttpStatus.OK) {
|
||||
return forEntity.getBody();
|
||||
@ -32,4 +45,8 @@ public class HttpUtil {
|
||||
}
|
||||
}
|
||||
|
||||
public static String getByHttpClientWithChrome(String url) {
|
||||
return getByHttpClientWithChrome(url, DEFAULT_CHARSET);
|
||||
}
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user