mirror of
https://github.com/201206030/novel.git
synced 2025-04-27 07:30:50 +00:00
增加百书斋源
This commit is contained in:
parent
f648a8e79e
commit
56645720b3
@ -10,7 +10,7 @@
|
|||||||
</parent>
|
</parent>
|
||||||
<groupId>xyz.zinglizingli</groupId>
|
<groupId>xyz.zinglizingli</groupId>
|
||||||
<artifactId>novel-front</artifactId>
|
<artifactId>novel-front</artifactId>
|
||||||
<version>2.2.0.beta</version>
|
<version>2.3.0.beta</version>
|
||||||
<name>novel-front</name>
|
<name>novel-front</name>
|
||||||
<description>小说精品楼-前台web网站</description>
|
<description>小说精品楼-前台web网站</description>
|
||||||
|
|
||||||
|
@ -0,0 +1,29 @@
|
|||||||
|
package xyz.zinglizingli.books.core.config;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.context.annotation.Primary;
|
||||||
|
import xyz.zinglizingli.books.core.crawl.BaseHtmlCrawlSource;
|
||||||
|
import xyz.zinglizingli.books.core.crawl.BiquCrawlSource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author 11797
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@Configuration
|
||||||
|
public class CrawlBaishuzhaiConfig {
|
||||||
|
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@Primary //必须加此注解,不然报错,下一个类则不需要添加
|
||||||
|
@ConfigurationProperties(prefix = "baishuzhai.crawlsource") // prefix值必须是application.yml中对应属性的前缀
|
||||||
|
@ConditionalOnProperty(prefix = "crawl.website",name = "type",havingValue = "4")
|
||||||
|
public BaseHtmlCrawlSource dingdianCrawlSource() {
|
||||||
|
return new BiquCrawlSource();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@ -91,7 +91,13 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
|
|||||||
Matcher updateTimeMatch = updateTimePatten.matcher(body);
|
Matcher updateTimeMatch = updateTimePatten.matcher(body);
|
||||||
if (updateTimeMatch.find()) {
|
if (updateTimeMatch.find()) {
|
||||||
String updateTimeStr = updateTimeMatch.group(1);
|
String updateTimeStr = updateTimeMatch.group(1);
|
||||||
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
|
SimpleDateFormat format ;
|
||||||
|
if(updateTimeStr.length()>10){
|
||||||
|
|
||||||
|
format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
|
||||||
|
}else{
|
||||||
|
format = new SimpleDateFormat("yy-MM-dd");
|
||||||
|
}
|
||||||
updateTime = format.parse(updateTimeStr);
|
updateTime = format.parse(updateTimeStr);
|
||||||
if(!newCat2Date.containsKey(i)) {
|
if(!newCat2Date.containsKey(i)) {
|
||||||
newCat2Date.put(i, updateTime);
|
newCat2Date.put(i, updateTime);
|
||||||
@ -159,7 +165,13 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
|
|||||||
Matcher updateTimeMatch = updateTimePatten.matcher(body);
|
Matcher updateTimeMatch = updateTimePatten.matcher(body);
|
||||||
if (updateTimeMatch.find()) {
|
if (updateTimeMatch.find()) {
|
||||||
String updateTimeStr = updateTimeMatch.group(1);
|
String updateTimeStr = updateTimeMatch.group(1);
|
||||||
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
|
SimpleDateFormat format ;
|
||||||
|
if(updateTimeStr.length()>10){
|
||||||
|
|
||||||
|
format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
|
||||||
|
}else{
|
||||||
|
format = new SimpleDateFormat("yy-MM-dd");
|
||||||
|
}
|
||||||
Date updateTime = format.parse(updateTimeStr);
|
Date updateTime = format.parse(updateTimeStr);
|
||||||
Pattern picPatten = compile(getPicPattern());
|
Pattern picPatten = compile(getPicPattern());
|
||||||
Matcher picMather = picPatten.matcher(body);
|
Matcher picMather = picPatten.matcher(body);
|
||||||
|
@ -54,6 +54,23 @@ dingdian:
|
|||||||
catalog-url-pattern: <a\s+href="(/ddk\d+/all.html)">查看完整目录</a>
|
catalog-url-pattern: <a\s+href="(/ddk\d+/all.html)">查看完整目录</a>
|
||||||
catalog-pattern: <a\s+style=""\s+href="(/ddk\d+/\d+\.html)">([^/]+)</a>
|
catalog-pattern: <a\s+style=""\s+href="(/ddk\d+/\d+\.html)">([^/]+)</a>
|
||||||
|
|
||||||
|
|
||||||
|
baishuzhai:
|
||||||
|
crawlsource:
|
||||||
|
index-url: https://m.baishuzhai.com
|
||||||
|
list-page-url: https://m.baishuzhai.com/sort/{0}/{1}.html
|
||||||
|
book-url-pattern: href="/(ibook/\d+/\d+)/"
|
||||||
|
score-pattern: <div\s+class="score">(\d+\.\d+)分</div>
|
||||||
|
book-name-pattern: <p class="title">([^/]+)</p>
|
||||||
|
author-pattern: 作者:([^/]+)<
|
||||||
|
status-pattern: 状态:([^/]+)</li>
|
||||||
|
cat-pattern: 类别:([^/]+)</li>
|
||||||
|
update-time-pattern: 更新:(\d+-\d+-\d+)</li>
|
||||||
|
pic-pattern: <img src="([^>]+)"\s+onerror="this.src=
|
||||||
|
intro-pattern: class="review">([^/]+)</p>
|
||||||
|
catalog-url-pattern: <a\s+href="(/ibook/\d+/\d+/all\.html)">查看完整目录</a>
|
||||||
|
catalog-pattern: <a\s+style=""\s+href="(/ibook/\d+/\d+/\d+\.html)">([^/]+)</a>
|
||||||
|
|
||||||
biquge:
|
biquge:
|
||||||
crawlsource:
|
crawlsource:
|
||||||
index-url: http://m.biquge.info
|
index-url: http://m.biquge.info
|
||||||
|
@ -84,10 +84,10 @@ books:
|
|||||||
#小说的更新间隔(分)
|
#小说的更新间隔(分)
|
||||||
updatePeriod: 1
|
updatePeriod: 1
|
||||||
|
|
||||||
#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔,3:顶点小说 更多网站解析中,敬请期待
|
#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔,3:顶点小说 ,4:百书斋 更多网站解析中,敬请期待
|
||||||
crawl:
|
crawl:
|
||||||
website:
|
website:
|
||||||
type: 2
|
type: 4
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,14 +1,15 @@
|
|||||||
server: {port: 8083}
|
server: {port: 8083}
|
||||||
spring:
|
spring:
|
||||||
datasource: {url: 'jdbc:mysql://47.106.243.172:3306/novel?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai',
|
datasource: {url: 'jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai',
|
||||||
username: novel, password: novel!8888}
|
username: root, password: test123456}
|
||||||
mybatis:
|
mybatis:
|
||||||
mapper-locations: classpath:mybatis/mapping/*.xml
|
mapper-locations: classpath:mybatis/mapping/*.xml
|
||||||
type-aliases-package: xyz.zinglizingli.books.po
|
type-aliases-package: xyz.zinglizingli.books.po
|
||||||
configuration: {log-impl: org.apache.ibatis.logging.stdout.StdOutImpl}
|
configuration: {log-impl: org.apache.ibatis.logging.stdout.StdOutImpl}
|
||||||
mysql: {charset: utf8mb4}
|
mysql: {charset: utf8mb4}
|
||||||
books: {lowestScore: '9.0'}
|
books: {lowestScore: 9.0}
|
||||||
crawl:
|
crawl:
|
||||||
website: {type: '2'}
|
website: {type: '4'}
|
||||||
soft-novel: '0'
|
soft-novel: '0'
|
||||||
manhua: '0'
|
manhua: '0'
|
||||||
|
logging: {config: 'classpath:logback-boot.xml'}
|
||||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user