增加百书斋源

This commit is contained in:
xiongxiaoyang 2020-04-14 18:42:09 +08:00
parent f648a8e79e
commit 56645720b3
7 changed files with 68 additions and 9 deletions

View File

@ -10,7 +10,7 @@
</parent>
<groupId>xyz.zinglizingli</groupId>
<artifactId>novel-front</artifactId>
<version>2.2.0.beta</version>
<version>2.3.0.beta</version>
<name>novel-front</name>
<description>小说精品楼-前台web网站</description>

View File

@ -0,0 +1,29 @@
package xyz.zinglizingli.books.core.config;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import xyz.zinglizingli.books.core.crawl.BaseHtmlCrawlSource;
import xyz.zinglizingli.books.core.crawl.BiquCrawlSource;
/**
* @author 11797
*/
@Slf4j
@Configuration
public class CrawlBaishuzhaiConfig {
@Bean
@Primary //必须加此注解不然报错下一个类则不需要添加
@ConfigurationProperties(prefix = "baishuzhai.crawlsource") // prefix值必须是application.yml中对应属性的前缀
@ConditionalOnProperty(prefix = "crawl.website",name = "type",havingValue = "4")
public BaseHtmlCrawlSource dingdianCrawlSource() {
return new BiquCrawlSource();
}
}

View File

@ -91,7 +91,13 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
SimpleDateFormat format ;
if(updateTimeStr.length()>10){
format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
}else{
format = new SimpleDateFormat("yy-MM-dd");
}
updateTime = format.parse(updateTimeStr);
if(!newCat2Date.containsKey(i)) {
newCat2Date.put(i, updateTime);
@ -159,7 +165,13 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
SimpleDateFormat format ;
if(updateTimeStr.length()>10){
format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
}else{
format = new SimpleDateFormat("yy-MM-dd");
}
Date updateTime = format.parse(updateTimeStr);
Pattern picPatten = compile(getPicPattern());
Matcher picMather = picPatten.matcher(body);

View File

@ -54,6 +54,23 @@ dingdian:
catalog-url-pattern: <a\s+href="(/ddk\d+/all.html)">查看完整目录</a>
catalog-pattern: <a\s+style=""\s+href="(/ddk\d+/\d+\.html)">([^/]+)</a>
baishuzhai:
crawlsource:
index-url: https://m.baishuzhai.com
list-page-url: https://m.baishuzhai.com/sort/{0}/{1}.html
book-url-pattern: href="/(ibook/\d+/\d+)/"
score-pattern: <div\s+class="score">(\d+\.\d+)分</div>
book-name-pattern: <p class="title">([^/]+)</p>
author-pattern: 作者:([^/]+)<
status-pattern: 状态:([^/]+)</li>
cat-pattern: 类别:([^/]+)</li>
update-time-pattern: 更新:(\d+-\d+-\d+)</li>
pic-pattern: <img src="([^>]+)"\s+onerror="this.src=
intro-pattern: class="review">([^/]+)</p>
catalog-url-pattern: <a\s+href="(/ibook/\d+/\d+/all\.html)">查看完整目录</a>
catalog-pattern: <a\s+style=""\s+href="(/ibook/\d+/\d+/\d+\.html)">([^/]+)</a>
biquge:
crawlsource:
index-url: http://m.biquge.info

View File

@ -84,10 +84,10 @@ books:
#小说的更新间隔(分)
updatePeriod: 1
#爬取的网站名称类型 1笔趣岛 2笔趣塔,3:顶点小说 更多网站解析中,敬请期待
#爬取的网站名称类型 1笔趣岛 2笔趣塔,3:顶点小说 4百书斋 更多网站解析中,敬请期待
crawl:
website:
type: 2
type: 4

View File

@ -1,14 +1,15 @@
server: {port: 8083}
spring:
datasource: {url: 'jdbc:mysql://47.106.243.172:3306/novel?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai',
username: novel, password: novel!8888}
datasource: {url: 'jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai',
username: root, password: test123456}
mybatis:
mapper-locations: classpath:mybatis/mapping/*.xml
type-aliases-package: xyz.zinglizingli.books.po
configuration: {log-impl: org.apache.ibatis.logging.stdout.StdOutImpl}
mysql: {charset: utf8mb4}
books: {lowestScore: '9.0'}
books: {lowestScore: 9.0}
crawl:
website: {type: '2'}
website: {type: '4'}
soft-novel: '0'
manhua: '0'
logging: {config: 'classpath:logback-boot.xml'}