mirror of
https://github.com/201206030/novel-plus.git
synced 2025-04-26 17:20:52 +00:00
feat: 增加 HTTP 代理配置,助力爬虫采集
This commit is contained in:
parent
0b728b9fe5
commit
ba272bd89a
13
README.md
13
README.md
@ -211,12 +211,13 @@ novel-plus -- 父工程
|
||||
2. 使用`unzip novel-crawl.zip`命令解压 novel-crawl.zip
|
||||
3. 修改 `config/application-common-prod.yml` 文件中的数据库配置
|
||||
4. 修改 `config/application-common-prod.yml` 文件中的管理员账号密码
|
||||
5. novel-crawl 目录下使用`bin/novel-crawl.sh start`命令启动爬虫程序
|
||||
6. 打开浏览器,默认`8083`端口访问
|
||||
7. 选择已有或新增爬虫源(支持自定义爬虫规则),点击`开启`按钮,开始采集小说数据
|
||||
8. novel-crawl 目录下使用`bin/novel-crawl.sh stop`命令停止爬虫程序
|
||||
9. novel-crawl 目录下使用`bin/novel-crawl.sh restart`命令重启爬虫程序
|
||||
10. novel-crawl 目录下使用`bin/novel-crawl.sh status`命令查看爬虫程序的运行状态
|
||||
5. 修改 `config/application-common-prod.yml` 文件中的 HTTP 代理配置
|
||||
6. novel-crawl 目录下使用`bin/novel-crawl.sh start`命令启动爬虫程序
|
||||
7. 打开浏览器,默认`8083`端口访问
|
||||
8. 选择已有或新增爬虫源(支持自定义爬虫规则),点击`开启`按钮,开始采集小说数据
|
||||
9. novel-crawl 目录下使用`bin/novel-crawl.sh stop`命令停止爬虫程序
|
||||
10. novel-crawl 目录下使用`bin/novel-crawl.sh restart`命令重启爬虫程序
|
||||
11. novel-crawl 目录下使用`bin/novel-crawl.sh status`命令查看爬虫程序的运行状态
|
||||
|
||||
- 前台安装
|
||||
|
||||
|
@ -0,0 +1,22 @@
|
||||
package com.java2nb.novel.core.config;
|
||||
|
||||
import lombok.Data;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* @author xiongxiaoyang
|
||||
* @date 2022/7/14
|
||||
*/
|
||||
@Data
|
||||
@Component
|
||||
@ConfigurationProperties(prefix = "http.proxy")
|
||||
public class HttpProxyProperties {
|
||||
|
||||
private Boolean enabled;
|
||||
|
||||
private String ip;
|
||||
|
||||
private Integer port;
|
||||
|
||||
}
|
@ -1,6 +1,8 @@
|
||||
package com.java2nb.novel.core.utils;
|
||||
|
||||
import com.java2nb.novel.core.config.HttpProxyProperties;
|
||||
import lombok.SneakyThrows;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.http.config.Registry;
|
||||
import org.apache.http.config.RegistryBuilder;
|
||||
import org.apache.http.conn.socket.ConnectionSocketFactory;
|
||||
@ -8,37 +10,46 @@ import org.apache.http.conn.socket.PlainConnectionSocketFactory;
|
||||
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
|
||||
import org.apache.http.conn.ssl.TrustStrategy;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClientBuilder;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
|
||||
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
|
||||
import org.springframework.http.converter.HttpMessageConverter;
|
||||
import org.springframework.http.converter.StringHttpMessageConverter;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
import javax.net.ssl.SSLContext;
|
||||
import java.nio.charset.Charset;
|
||||
import java.security.cert.X509Certificate;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
@Component
|
||||
public class RestTemplateUtil {
|
||||
|
||||
private static HttpProxyProperties httpProxyProperties;
|
||||
|
||||
RestTemplateUtil(HttpProxyProperties properties) {
|
||||
httpProxyProperties = properties;
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
public static RestTemplate getInstance(String charset) {
|
||||
|
||||
TrustStrategy acceptingTrustStrategy = (X509Certificate[] chain, String authType) -> true;
|
||||
|
||||
//忽略证书
|
||||
SSLContext sslContext = org.apache.http.ssl.SSLContexts.custom()
|
||||
.loadTrustMaterial(null, acceptingTrustStrategy)
|
||||
.build();
|
||||
.loadTrustMaterial(null, acceptingTrustStrategy)
|
||||
.build();
|
||||
|
||||
SSLConnectionSocketFactory csf = new SSLConnectionSocketFactory(sslContext);
|
||||
|
||||
Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
|
||||
.register("http", PlainConnectionSocketFactory.getSocketFactory())
|
||||
.register("https", csf)
|
||||
.build();
|
||||
.register("http", PlainConnectionSocketFactory.getSocketFactory())
|
||||
.register("https", csf)
|
||||
.build();
|
||||
PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(registry);
|
||||
|
||||
//连接池的最大连接数,0代表不限;如果取0,需要考虑连接泄露导致系统崩溃的后果
|
||||
@ -46,22 +57,26 @@ public class RestTemplateUtil {
|
||||
//每个路由的最大连接数,如果只调用一个地址,可以将其设置为最大连接数
|
||||
connectionManager.setDefaultMaxPerRoute(300);
|
||||
|
||||
CloseableHttpClient httpClient = HttpClients.custom()
|
||||
.setConnectionManager(connectionManager)
|
||||
.build();
|
||||
|
||||
HttpClientBuilder clientBuilder = HttpClients.custom();
|
||||
if (Objects.nonNull(httpProxyProperties) && Boolean.TRUE.equals(httpProxyProperties.getEnabled())) {
|
||||
HttpHost proxy = new HttpHost(httpProxyProperties.getIp(), httpProxyProperties.getPort());
|
||||
clientBuilder.setProxy(proxy);
|
||||
}
|
||||
CloseableHttpClient httpClient = clientBuilder.setConnectionManager(connectionManager)
|
||||
.build();
|
||||
|
||||
HttpComponentsClientHttpRequestFactory requestFactory =
|
||||
new HttpComponentsClientHttpRequestFactory();
|
||||
new HttpComponentsClientHttpRequestFactory();
|
||||
|
||||
requestFactory.setHttpClient(httpClient);
|
||||
requestFactory.setConnectionRequestTimeout(3000);
|
||||
requestFactory.setConnectTimeout(3000);
|
||||
requestFactory.setReadTimeout(30000);
|
||||
|
||||
RestTemplate restTemplate = new RestTemplate(requestFactory);
|
||||
List<HttpMessageConverter<?>> list = restTemplate.getMessageConverters();
|
||||
for (HttpMessageConverter<?> httpMessageConverter : list) {
|
||||
if(httpMessageConverter instanceof StringHttpMessageConverter) {
|
||||
if (httpMessageConverter instanceof StringHttpMessageConverter) {
|
||||
((StringHttpMessageConverter) httpMessageConverter).setDefaultCharset(Charset.forName(charset));
|
||||
break;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
spring:
|
||||
profiles:
|
||||
include: [common]
|
||||
include: [ common ]
|
||||
main:
|
||||
allow-bean-definition-overriding: true
|
||||
#Redis服务器IP
|
||||
@ -54,24 +54,30 @@ sharding:
|
||||
props:
|
||||
sql.show: true
|
||||
tables:
|
||||
book_content: #book_content表
|
||||
book_content: #book_content表
|
||||
key-generator-column-name: id #主键
|
||||
actual-data-nodes: ds${0}.book_content${0..9} #数据节点
|
||||
# database-strategy: #分库策略
|
||||
# inline:
|
||||
# sharding-column: book_id
|
||||
# algorithm-expression: ds${book_id % 10}
|
||||
table-strategy: #分表策略
|
||||
table-strategy: #分表策略
|
||||
inline:
|
||||
shardingColumn: index_id
|
||||
algorithm-expression: book_content${index_id % 10}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
content:
|
||||
save:
|
||||
storage: db #存储介质,db:数据库,file:txt文本
|
||||
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
||||
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
||||
|
||||
# HTTP 代理配置
|
||||
http:
|
||||
proxy:
|
||||
# 是否开启 HTTP 代理,true-开启,false-不开启
|
||||
enabled: false
|
||||
# 代理 IP
|
||||
ip: u493.kdltps.com
|
||||
# 代理端口号
|
||||
port: 15818
|
@ -1,6 +1,6 @@
|
||||
spring:
|
||||
profiles:
|
||||
include: [common]
|
||||
include: [ common ]
|
||||
main:
|
||||
allow-bean-definition-overriding: true
|
||||
#Redis服务器IP
|
||||
@ -54,14 +54,14 @@ sharding:
|
||||
props:
|
||||
sql.show: true
|
||||
tables:
|
||||
book_content: #book_content表
|
||||
book_content: #book_content表
|
||||
key-generator-column-name: id #主键
|
||||
actual-data-nodes: ds${0}.book_content${0..9} #数据节点
|
||||
# database-strategy: #分库策略
|
||||
# inline:
|
||||
# sharding-column: book_id
|
||||
# algorithm-expression: ds${book_id % 10}
|
||||
table-strategy: #分表策略
|
||||
table-strategy: #分表策略
|
||||
inline:
|
||||
shardingColumn: index_id
|
||||
algorithm-expression: book_content${index_id % 10}
|
||||
@ -79,7 +79,15 @@ content:
|
||||
storage: db #存储介质,db:数据库,file:txt文本
|
||||
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
||||
|
||||
|
||||
# HTTP 代理配置
|
||||
http:
|
||||
proxy:
|
||||
# 是否开启 HTTP 代理,true-开启,false-不开启
|
||||
enabled: false
|
||||
# 代理 IP
|
||||
ip: 40.83.102.86
|
||||
# 代理端口号
|
||||
port: 80
|
||||
|
||||
|
||||
|
||||
|
@ -36,4 +36,14 @@ crawl:
|
||||
content:
|
||||
save:
|
||||
storage: db #存储介质,db:数据库,file:txt文本
|
||||
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
||||
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
||||
|
||||
# HTTP 代理配置
|
||||
http:
|
||||
proxy:
|
||||
# 是否开启 HTTP 代理,true-开启,false-不开启
|
||||
enabled: false
|
||||
# 代理 IP
|
||||
ip: u493.kdltps.com
|
||||
# 代理端口号
|
||||
port: 15818
|
Loading…
x
Reference in New Issue
Block a user