From ba272bd89a82f9d29c405c8031c891d58b5444ab Mon Sep 17 00:00:00 2001 From: xiongxiaoyang <1179705413@qq.com> Date: Thu, 14 Jul 2022 22:14:11 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=A2=9E=E5=8A=A0=20HTTP=20=E4=BB=A3?= =?UTF-8?q?=E7=90=86=E9=85=8D=E7=BD=AE=EF=BC=8C=E5=8A=A9=E5=8A=9B=E7=88=AC?= =?UTF-8?q?=E8=99=AB=E9=87=87=E9=9B=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 13 ++++--- .../core/config/HttpProxyProperties.java | 22 +++++++++++ .../novel/core/utils/RestTemplateUtil.java | 37 +++++++++++++------ .../main/resources/application-common-dev.yml | 22 +++++++---- .../resources/application-common-prod.yml | 16 ++++++-- .../build/config/application-common-prod.yml | 12 +++++- 6 files changed, 92 insertions(+), 30 deletions(-) create mode 100644 novel-common/src/main/java/com/java2nb/novel/core/config/HttpProxyProperties.java diff --git a/README.md b/README.md index 4f7cf93..820a31c 100644 --- a/README.md +++ b/README.md @@ -211,12 +211,13 @@ novel-plus -- 父工程 2. 使用`unzip novel-crawl.zip`命令解压 novel-crawl.zip 3. 修改 `config/application-common-prod.yml` 文件中的数据库配置 4. 修改 `config/application-common-prod.yml` 文件中的管理员账号密码 - 5. novel-crawl 目录下使用`bin/novel-crawl.sh start`命令启动爬虫程序 - 6. 打开浏览器,默认`8083`端口访问 - 7. 选择已有或新增爬虫源(支持自定义爬虫规则),点击`开启`按钮,开始采集小说数据 - 8. novel-crawl 目录下使用`bin/novel-crawl.sh stop`命令停止爬虫程序 - 9. novel-crawl 目录下使用`bin/novel-crawl.sh restart`命令重启爬虫程序 - 10. novel-crawl 目录下使用`bin/novel-crawl.sh status`命令查看爬虫程序的运行状态 + 5. 修改 `config/application-common-prod.yml` 文件中的 HTTP 代理配置 + 6. novel-crawl 目录下使用`bin/novel-crawl.sh start`命令启动爬虫程序 + 7. 打开浏览器,默认`8083`端口访问 + 8. 选择已有或新增爬虫源(支持自定义爬虫规则),点击`开启`按钮,开始采集小说数据 + 9. novel-crawl 目录下使用`bin/novel-crawl.sh stop`命令停止爬虫程序 + 10. novel-crawl 目录下使用`bin/novel-crawl.sh restart`命令重启爬虫程序 + 11. novel-crawl 目录下使用`bin/novel-crawl.sh status`命令查看爬虫程序的运行状态 - 前台安装 diff --git a/novel-common/src/main/java/com/java2nb/novel/core/config/HttpProxyProperties.java b/novel-common/src/main/java/com/java2nb/novel/core/config/HttpProxyProperties.java new file mode 100644 index 0000000..140f298 --- /dev/null +++ b/novel-common/src/main/java/com/java2nb/novel/core/config/HttpProxyProperties.java @@ -0,0 +1,22 @@ +package com.java2nb.novel.core.config; + +import lombok.Data; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +/** + * @author xiongxiaoyang + * @date 2022/7/14 + */ +@Data +@Component +@ConfigurationProperties(prefix = "http.proxy") +public class HttpProxyProperties { + + private Boolean enabled; + + private String ip; + + private Integer port; + +} diff --git a/novel-common/src/main/java/com/java2nb/novel/core/utils/RestTemplateUtil.java b/novel-common/src/main/java/com/java2nb/novel/core/utils/RestTemplateUtil.java index f05b700..d4d2684 100644 --- a/novel-common/src/main/java/com/java2nb/novel/core/utils/RestTemplateUtil.java +++ b/novel-common/src/main/java/com/java2nb/novel/core/utils/RestTemplateUtil.java @@ -1,6 +1,8 @@ package com.java2nb.novel.core.utils; +import com.java2nb.novel.core.config.HttpProxyProperties; import lombok.SneakyThrows; +import org.apache.http.HttpHost; import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; import org.apache.http.conn.socket.ConnectionSocketFactory; @@ -8,37 +10,46 @@ import org.apache.http.conn.socket.PlainConnectionSocketFactory; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustStrategy; import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.springframework.http.client.HttpComponentsClientHttpRequestFactory; import org.springframework.http.converter.HttpMessageConverter; import org.springframework.http.converter.StringHttpMessageConverter; +import org.springframework.stereotype.Component; import org.springframework.web.client.RestTemplate; import javax.net.ssl.SSLContext; import java.nio.charset.Charset; import java.security.cert.X509Certificate; import java.util.List; +import java.util.Objects; +@Component public class RestTemplateUtil { + private static HttpProxyProperties httpProxyProperties; + RestTemplateUtil(HttpProxyProperties properties) { + httpProxyProperties = properties; + } @SneakyThrows public static RestTemplate getInstance(String charset) { + TrustStrategy acceptingTrustStrategy = (X509Certificate[] chain, String authType) -> true; //忽略证书 SSLContext sslContext = org.apache.http.ssl.SSLContexts.custom() - .loadTrustMaterial(null, acceptingTrustStrategy) - .build(); + .loadTrustMaterial(null, acceptingTrustStrategy) + .build(); SSLConnectionSocketFactory csf = new SSLConnectionSocketFactory(sslContext); Registry registry = RegistryBuilder.create() - .register("http", PlainConnectionSocketFactory.getSocketFactory()) - .register("https", csf) - .build(); + .register("http", PlainConnectionSocketFactory.getSocketFactory()) + .register("https", csf) + .build(); PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(registry); //连接池的最大连接数,0代表不限;如果取0,需要考虑连接泄露导致系统崩溃的后果 @@ -46,22 +57,26 @@ public class RestTemplateUtil { //每个路由的最大连接数,如果只调用一个地址,可以将其设置为最大连接数 connectionManager.setDefaultMaxPerRoute(300); - CloseableHttpClient httpClient = HttpClients.custom() - .setConnectionManager(connectionManager) - .build(); - + HttpClientBuilder clientBuilder = HttpClients.custom(); + if (Objects.nonNull(httpProxyProperties) && Boolean.TRUE.equals(httpProxyProperties.getEnabled())) { + HttpHost proxy = new HttpHost(httpProxyProperties.getIp(), httpProxyProperties.getPort()); + clientBuilder.setProxy(proxy); + } + CloseableHttpClient httpClient = clientBuilder.setConnectionManager(connectionManager) + .build(); HttpComponentsClientHttpRequestFactory requestFactory = - new HttpComponentsClientHttpRequestFactory(); + new HttpComponentsClientHttpRequestFactory(); requestFactory.setHttpClient(httpClient); requestFactory.setConnectionRequestTimeout(3000); requestFactory.setConnectTimeout(3000); requestFactory.setReadTimeout(30000); + RestTemplate restTemplate = new RestTemplate(requestFactory); List> list = restTemplate.getMessageConverters(); for (HttpMessageConverter httpMessageConverter : list) { - if(httpMessageConverter instanceof StringHttpMessageConverter) { + if (httpMessageConverter instanceof StringHttpMessageConverter) { ((StringHttpMessageConverter) httpMessageConverter).setDefaultCharset(Charset.forName(charset)); break; } diff --git a/novel-common/src/main/resources/application-common-dev.yml b/novel-common/src/main/resources/application-common-dev.yml index 92d740d..d59ac01 100644 --- a/novel-common/src/main/resources/application-common-dev.yml +++ b/novel-common/src/main/resources/application-common-dev.yml @@ -1,6 +1,6 @@ spring: profiles: - include: [common] + include: [ common ] main: allow-bean-definition-overriding: true #Redis服务器IP @@ -54,24 +54,30 @@ sharding: props: sql.show: true tables: - book_content: #book_content表 + book_content: #book_content表 key-generator-column-name: id #主键 actual-data-nodes: ds${0}.book_content${0..9} #数据节点 # database-strategy: #分库策略 # inline: # sharding-column: book_id # algorithm-expression: ds${book_id % 10} - table-strategy: #分表策略 + table-strategy: #分表策略 inline: shardingColumn: index_id algorithm-expression: book_content${index_id % 10} - - - - content: save: storage: db #存储介质,db:数据库,file:txt文本 - path: /Users/xiongxiaoyang/books #txt小说文本保存路径 \ No newline at end of file + path: /Users/xiongxiaoyang/books #txt小说文本保存路径 + +# HTTP 代理配置 +http: + proxy: + # 是否开启 HTTP 代理,true-开启,false-不开启 + enabled: false + # 代理 IP + ip: u493.kdltps.com + # 代理端口号 + port: 15818 \ No newline at end of file diff --git a/novel-common/src/main/resources/application-common-prod.yml b/novel-common/src/main/resources/application-common-prod.yml index 9f02a0a..df2b262 100644 --- a/novel-common/src/main/resources/application-common-prod.yml +++ b/novel-common/src/main/resources/application-common-prod.yml @@ -1,6 +1,6 @@ spring: profiles: - include: [common] + include: [ common ] main: allow-bean-definition-overriding: true #Redis服务器IP @@ -54,14 +54,14 @@ sharding: props: sql.show: true tables: - book_content: #book_content表 + book_content: #book_content表 key-generator-column-name: id #主键 actual-data-nodes: ds${0}.book_content${0..9} #数据节点 # database-strategy: #分库策略 # inline: # sharding-column: book_id # algorithm-expression: ds${book_id % 10} - table-strategy: #分表策略 + table-strategy: #分表策略 inline: shardingColumn: index_id algorithm-expression: book_content${index_id % 10} @@ -79,7 +79,15 @@ content: storage: db #存储介质,db:数据库,file:txt文本 path: /Users/xiongxiaoyang/books #txt小说文本保存路径 - +# HTTP 代理配置 +http: + proxy: + # 是否开启 HTTP 代理,true-开启,false-不开启 + enabled: false + # 代理 IP + ip: 40.83.102.86 + # 代理端口号 + port: 80 diff --git a/novel-crawl/src/main/build/config/application-common-prod.yml b/novel-crawl/src/main/build/config/application-common-prod.yml index 552d642..c181de9 100644 --- a/novel-crawl/src/main/build/config/application-common-prod.yml +++ b/novel-crawl/src/main/build/config/application-common-prod.yml @@ -36,4 +36,14 @@ crawl: content: save: storage: db #存储介质,db:数据库,file:txt文本 - path: /Users/xiongxiaoyang/books #txt小说文本保存路径 \ No newline at end of file + path: /Users/xiongxiaoyang/books #txt小说文本保存路径 + +# HTTP 代理配置 +http: + proxy: + # 是否开启 HTTP 代理,true-开启,false-不开启 + enabled: false + # 代理 IP + ip: u493.kdltps.com + # 代理端口号 + port: 15818 \ No newline at end of file