mirror of
https://github.com/201206030/novel-plus.git
synced 2025-04-27 01:30:51 +00:00
feat: 增加 HTTP 代理配置,助力爬虫采集
This commit is contained in:
parent
0b728b9fe5
commit
ba272bd89a
13
README.md
13
README.md
@ -211,12 +211,13 @@ novel-plus -- 父工程
|
|||||||
2. 使用`unzip novel-crawl.zip`命令解压 novel-crawl.zip
|
2. 使用`unzip novel-crawl.zip`命令解压 novel-crawl.zip
|
||||||
3. 修改 `config/application-common-prod.yml` 文件中的数据库配置
|
3. 修改 `config/application-common-prod.yml` 文件中的数据库配置
|
||||||
4. 修改 `config/application-common-prod.yml` 文件中的管理员账号密码
|
4. 修改 `config/application-common-prod.yml` 文件中的管理员账号密码
|
||||||
5. novel-crawl 目录下使用`bin/novel-crawl.sh start`命令启动爬虫程序
|
5. 修改 `config/application-common-prod.yml` 文件中的 HTTP 代理配置
|
||||||
6. 打开浏览器,默认`8083`端口访问
|
6. novel-crawl 目录下使用`bin/novel-crawl.sh start`命令启动爬虫程序
|
||||||
7. 选择已有或新增爬虫源(支持自定义爬虫规则),点击`开启`按钮,开始采集小说数据
|
7. 打开浏览器,默认`8083`端口访问
|
||||||
8. novel-crawl 目录下使用`bin/novel-crawl.sh stop`命令停止爬虫程序
|
8. 选择已有或新增爬虫源(支持自定义爬虫规则),点击`开启`按钮,开始采集小说数据
|
||||||
9. novel-crawl 目录下使用`bin/novel-crawl.sh restart`命令重启爬虫程序
|
9. novel-crawl 目录下使用`bin/novel-crawl.sh stop`命令停止爬虫程序
|
||||||
10. novel-crawl 目录下使用`bin/novel-crawl.sh status`命令查看爬虫程序的运行状态
|
10. novel-crawl 目录下使用`bin/novel-crawl.sh restart`命令重启爬虫程序
|
||||||
|
11. novel-crawl 目录下使用`bin/novel-crawl.sh status`命令查看爬虫程序的运行状态
|
||||||
|
|
||||||
- 前台安装
|
- 前台安装
|
||||||
|
|
||||||
|
@ -0,0 +1,22 @@
|
|||||||
|
package com.java2nb.novel.core.config;
|
||||||
|
|
||||||
|
import lombok.Data;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author xiongxiaoyang
|
||||||
|
* @date 2022/7/14
|
||||||
|
*/
|
||||||
|
@Data
|
||||||
|
@Component
|
||||||
|
@ConfigurationProperties(prefix = "http.proxy")
|
||||||
|
public class HttpProxyProperties {
|
||||||
|
|
||||||
|
private Boolean enabled;
|
||||||
|
|
||||||
|
private String ip;
|
||||||
|
|
||||||
|
private Integer port;
|
||||||
|
|
||||||
|
}
|
@ -1,6 +1,8 @@
|
|||||||
package com.java2nb.novel.core.utils;
|
package com.java2nb.novel.core.utils;
|
||||||
|
|
||||||
|
import com.java2nb.novel.core.config.HttpProxyProperties;
|
||||||
import lombok.SneakyThrows;
|
import lombok.SneakyThrows;
|
||||||
|
import org.apache.http.HttpHost;
|
||||||
import org.apache.http.config.Registry;
|
import org.apache.http.config.Registry;
|
||||||
import org.apache.http.config.RegistryBuilder;
|
import org.apache.http.config.RegistryBuilder;
|
||||||
import org.apache.http.conn.socket.ConnectionSocketFactory;
|
import org.apache.http.conn.socket.ConnectionSocketFactory;
|
||||||
@ -8,37 +10,46 @@ import org.apache.http.conn.socket.PlainConnectionSocketFactory;
|
|||||||
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
|
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
|
||||||
import org.apache.http.conn.ssl.TrustStrategy;
|
import org.apache.http.conn.ssl.TrustStrategy;
|
||||||
import org.apache.http.impl.client.CloseableHttpClient;
|
import org.apache.http.impl.client.CloseableHttpClient;
|
||||||
|
import org.apache.http.impl.client.HttpClientBuilder;
|
||||||
import org.apache.http.impl.client.HttpClients;
|
import org.apache.http.impl.client.HttpClients;
|
||||||
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
|
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
|
||||||
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
|
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
|
||||||
import org.springframework.http.converter.HttpMessageConverter;
|
import org.springframework.http.converter.HttpMessageConverter;
|
||||||
import org.springframework.http.converter.StringHttpMessageConverter;
|
import org.springframework.http.converter.StringHttpMessageConverter;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
import org.springframework.web.client.RestTemplate;
|
import org.springframework.web.client.RestTemplate;
|
||||||
|
|
||||||
import javax.net.ssl.SSLContext;
|
import javax.net.ssl.SSLContext;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.security.cert.X509Certificate;
|
import java.security.cert.X509Certificate;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
@Component
|
||||||
public class RestTemplateUtil {
|
public class RestTemplateUtil {
|
||||||
|
|
||||||
|
private static HttpProxyProperties httpProxyProperties;
|
||||||
|
|
||||||
|
RestTemplateUtil(HttpProxyProperties properties) {
|
||||||
|
httpProxyProperties = properties;
|
||||||
|
}
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public static RestTemplate getInstance(String charset) {
|
public static RestTemplate getInstance(String charset) {
|
||||||
|
|
||||||
TrustStrategy acceptingTrustStrategy = (X509Certificate[] chain, String authType) -> true;
|
TrustStrategy acceptingTrustStrategy = (X509Certificate[] chain, String authType) -> true;
|
||||||
|
|
||||||
//忽略证书
|
//忽略证书
|
||||||
SSLContext sslContext = org.apache.http.ssl.SSLContexts.custom()
|
SSLContext sslContext = org.apache.http.ssl.SSLContexts.custom()
|
||||||
.loadTrustMaterial(null, acceptingTrustStrategy)
|
.loadTrustMaterial(null, acceptingTrustStrategy)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
SSLConnectionSocketFactory csf = new SSLConnectionSocketFactory(sslContext);
|
SSLConnectionSocketFactory csf = new SSLConnectionSocketFactory(sslContext);
|
||||||
|
|
||||||
Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
|
Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
|
||||||
.register("http", PlainConnectionSocketFactory.getSocketFactory())
|
.register("http", PlainConnectionSocketFactory.getSocketFactory())
|
||||||
.register("https", csf)
|
.register("https", csf)
|
||||||
.build();
|
.build();
|
||||||
PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(registry);
|
PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(registry);
|
||||||
|
|
||||||
//连接池的最大连接数,0代表不限;如果取0,需要考虑连接泄露导致系统崩溃的后果
|
//连接池的最大连接数,0代表不限;如果取0,需要考虑连接泄露导致系统崩溃的后果
|
||||||
@ -46,22 +57,26 @@ public class RestTemplateUtil {
|
|||||||
//每个路由的最大连接数,如果只调用一个地址,可以将其设置为最大连接数
|
//每个路由的最大连接数,如果只调用一个地址,可以将其设置为最大连接数
|
||||||
connectionManager.setDefaultMaxPerRoute(300);
|
connectionManager.setDefaultMaxPerRoute(300);
|
||||||
|
|
||||||
CloseableHttpClient httpClient = HttpClients.custom()
|
HttpClientBuilder clientBuilder = HttpClients.custom();
|
||||||
.setConnectionManager(connectionManager)
|
if (Objects.nonNull(httpProxyProperties) && Boolean.TRUE.equals(httpProxyProperties.getEnabled())) {
|
||||||
.build();
|
HttpHost proxy = new HttpHost(httpProxyProperties.getIp(), httpProxyProperties.getPort());
|
||||||
|
clientBuilder.setProxy(proxy);
|
||||||
|
}
|
||||||
|
CloseableHttpClient httpClient = clientBuilder.setConnectionManager(connectionManager)
|
||||||
|
.build();
|
||||||
|
|
||||||
HttpComponentsClientHttpRequestFactory requestFactory =
|
HttpComponentsClientHttpRequestFactory requestFactory =
|
||||||
new HttpComponentsClientHttpRequestFactory();
|
new HttpComponentsClientHttpRequestFactory();
|
||||||
|
|
||||||
requestFactory.setHttpClient(httpClient);
|
requestFactory.setHttpClient(httpClient);
|
||||||
requestFactory.setConnectionRequestTimeout(3000);
|
requestFactory.setConnectionRequestTimeout(3000);
|
||||||
requestFactory.setConnectTimeout(3000);
|
requestFactory.setConnectTimeout(3000);
|
||||||
requestFactory.setReadTimeout(30000);
|
requestFactory.setReadTimeout(30000);
|
||||||
|
|
||||||
RestTemplate restTemplate = new RestTemplate(requestFactory);
|
RestTemplate restTemplate = new RestTemplate(requestFactory);
|
||||||
List<HttpMessageConverter<?>> list = restTemplate.getMessageConverters();
|
List<HttpMessageConverter<?>> list = restTemplate.getMessageConverters();
|
||||||
for (HttpMessageConverter<?> httpMessageConverter : list) {
|
for (HttpMessageConverter<?> httpMessageConverter : list) {
|
||||||
if(httpMessageConverter instanceof StringHttpMessageConverter) {
|
if (httpMessageConverter instanceof StringHttpMessageConverter) {
|
||||||
((StringHttpMessageConverter) httpMessageConverter).setDefaultCharset(Charset.forName(charset));
|
((StringHttpMessageConverter) httpMessageConverter).setDefaultCharset(Charset.forName(charset));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
spring:
|
spring:
|
||||||
profiles:
|
profiles:
|
||||||
include: [common]
|
include: [ common ]
|
||||||
main:
|
main:
|
||||||
allow-bean-definition-overriding: true
|
allow-bean-definition-overriding: true
|
||||||
#Redis服务器IP
|
#Redis服务器IP
|
||||||
@ -54,24 +54,30 @@ sharding:
|
|||||||
props:
|
props:
|
||||||
sql.show: true
|
sql.show: true
|
||||||
tables:
|
tables:
|
||||||
book_content: #book_content表
|
book_content: #book_content表
|
||||||
key-generator-column-name: id #主键
|
key-generator-column-name: id #主键
|
||||||
actual-data-nodes: ds${0}.book_content${0..9} #数据节点
|
actual-data-nodes: ds${0}.book_content${0..9} #数据节点
|
||||||
# database-strategy: #分库策略
|
# database-strategy: #分库策略
|
||||||
# inline:
|
# inline:
|
||||||
# sharding-column: book_id
|
# sharding-column: book_id
|
||||||
# algorithm-expression: ds${book_id % 10}
|
# algorithm-expression: ds${book_id % 10}
|
||||||
table-strategy: #分表策略
|
table-strategy: #分表策略
|
||||||
inline:
|
inline:
|
||||||
shardingColumn: index_id
|
shardingColumn: index_id
|
||||||
algorithm-expression: book_content${index_id % 10}
|
algorithm-expression: book_content${index_id % 10}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
content:
|
content:
|
||||||
save:
|
save:
|
||||||
storage: db #存储介质,db:数据库,file:txt文本
|
storage: db #存储介质,db:数据库,file:txt文本
|
||||||
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
||||||
|
|
||||||
|
# HTTP 代理配置
|
||||||
|
http:
|
||||||
|
proxy:
|
||||||
|
# 是否开启 HTTP 代理,true-开启,false-不开启
|
||||||
|
enabled: false
|
||||||
|
# 代理 IP
|
||||||
|
ip: u493.kdltps.com
|
||||||
|
# 代理端口号
|
||||||
|
port: 15818
|
@ -1,6 +1,6 @@
|
|||||||
spring:
|
spring:
|
||||||
profiles:
|
profiles:
|
||||||
include: [common]
|
include: [ common ]
|
||||||
main:
|
main:
|
||||||
allow-bean-definition-overriding: true
|
allow-bean-definition-overriding: true
|
||||||
#Redis服务器IP
|
#Redis服务器IP
|
||||||
@ -54,14 +54,14 @@ sharding:
|
|||||||
props:
|
props:
|
||||||
sql.show: true
|
sql.show: true
|
||||||
tables:
|
tables:
|
||||||
book_content: #book_content表
|
book_content: #book_content表
|
||||||
key-generator-column-name: id #主键
|
key-generator-column-name: id #主键
|
||||||
actual-data-nodes: ds${0}.book_content${0..9} #数据节点
|
actual-data-nodes: ds${0}.book_content${0..9} #数据节点
|
||||||
# database-strategy: #分库策略
|
# database-strategy: #分库策略
|
||||||
# inline:
|
# inline:
|
||||||
# sharding-column: book_id
|
# sharding-column: book_id
|
||||||
# algorithm-expression: ds${book_id % 10}
|
# algorithm-expression: ds${book_id % 10}
|
||||||
table-strategy: #分表策略
|
table-strategy: #分表策略
|
||||||
inline:
|
inline:
|
||||||
shardingColumn: index_id
|
shardingColumn: index_id
|
||||||
algorithm-expression: book_content${index_id % 10}
|
algorithm-expression: book_content${index_id % 10}
|
||||||
@ -79,7 +79,15 @@ content:
|
|||||||
storage: db #存储介质,db:数据库,file:txt文本
|
storage: db #存储介质,db:数据库,file:txt文本
|
||||||
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
||||||
|
|
||||||
|
# HTTP 代理配置
|
||||||
|
http:
|
||||||
|
proxy:
|
||||||
|
# 是否开启 HTTP 代理,true-开启,false-不开启
|
||||||
|
enabled: false
|
||||||
|
# 代理 IP
|
||||||
|
ip: 40.83.102.86
|
||||||
|
# 代理端口号
|
||||||
|
port: 80
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,3 +37,13 @@ content:
|
|||||||
save:
|
save:
|
||||||
storage: db #存储介质,db:数据库,file:txt文本
|
storage: db #存储介质,db:数据库,file:txt文本
|
||||||
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
path: /Users/xiongxiaoyang/books #txt小说文本保存路径
|
||||||
|
|
||||||
|
# HTTP 代理配置
|
||||||
|
http:
|
||||||
|
proxy:
|
||||||
|
# 是否开启 HTTP 代理,true-开启,false-不开启
|
||||||
|
enabled: false
|
||||||
|
# 代理 IP
|
||||||
|
ip: u493.kdltps.com
|
||||||
|
# 代理端口号
|
||||||
|
port: 15818
|
Loading…
x
Reference in New Issue
Block a user