diff --git a/novel-front/pom.xml b/novel-front/pom.xml index 1b3f4f9..b3d24d3 100644 --- a/novel-front/pom.xml +++ b/novel-front/pom.xml @@ -10,7 +10,7 @@ xyz.zinglizingli novel-front - 2.2.0.beta + 2.3.0.beta novel-front 小说精品楼-前台web网站 diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBaishuzhaiConfig.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBaishuzhaiConfig.java new file mode 100644 index 0000000..bc4a992 --- /dev/null +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBaishuzhaiConfig.java @@ -0,0 +1,29 @@ +package xyz.zinglizingli.books.core.config; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Primary; +import xyz.zinglizingli.books.core.crawl.BaseHtmlCrawlSource; +import xyz.zinglizingli.books.core.crawl.BiquCrawlSource; + +/** + * @author 11797 + */ +@Slf4j +@Configuration +public class CrawlBaishuzhaiConfig { + + + @Bean + @Primary //必须加此注解,不然报错,下一个类则不需要添加 + @ConfigurationProperties(prefix = "baishuzhai.crawlsource") // prefix值必须是application.yml中对应属性的前缀 + @ConditionalOnProperty(prefix = "crawl.website",name = "type",havingValue = "4") + public BaseHtmlCrawlSource dingdianCrawlSource() { + return new BiquCrawlSource(); + } + + +} diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java index 9009005..104ad80 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java @@ -91,7 +91,13 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource { Matcher updateTimeMatch = updateTimePatten.matcher(body); if (updateTimeMatch.find()) { String updateTimeStr = updateTimeMatch.group(1); - SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss"); + SimpleDateFormat format ; + if(updateTimeStr.length()>10){ + + format = new SimpleDateFormat("yy-MM-dd HH:mm:ss"); + }else{ + format = new SimpleDateFormat("yy-MM-dd"); + } updateTime = format.parse(updateTimeStr); if(!newCat2Date.containsKey(i)) { newCat2Date.put(i, updateTime); @@ -159,7 +165,13 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource { Matcher updateTimeMatch = updateTimePatten.matcher(body); if (updateTimeMatch.find()) { String updateTimeStr = updateTimeMatch.group(1); - SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss"); + SimpleDateFormat format ; + if(updateTimeStr.length()>10){ + + format = new SimpleDateFormat("yy-MM-dd HH:mm:ss"); + }else{ + format = new SimpleDateFormat("yy-MM-dd"); + } Date updateTime = format.parse(updateTimeStr); Pattern picPatten = compile(getPicPattern()); Matcher picMather = picPatten.matcher(body); diff --git a/novel-front/src/main/resources/application-crawl.yml b/novel-front/src/main/resources/application-crawl.yml index 9b04bf2..3b068fe 100644 --- a/novel-front/src/main/resources/application-crawl.yml +++ b/novel-front/src/main/resources/application-crawl.yml @@ -54,6 +54,23 @@ dingdian: catalog-url-pattern: 查看完整目录 catalog-pattern: ([^/]+) + +baishuzhai: + crawlsource: + index-url: https://m.baishuzhai.com + list-page-url: https://m.baishuzhai.com/sort/{0}/{1}.html + book-url-pattern: href="/(ibook/\d+/\d+)/" + score-pattern: (\d+\.\d+)分 + book-name-pattern:

([^/]+)

+ author-pattern: 作者:([^/]+)< + status-pattern: 状态:([^/]+) + cat-pattern: 类别:([^/]+) + update-time-pattern: 更新:(\d+-\d+-\d+) + pic-pattern: ([^/]+)

+ catalog-url-pattern: 查看完整目录 + catalog-pattern: ([^/]+) + biquge: crawlsource: index-url: http://m.biquge.info diff --git a/novel-front/src/main/resources/application.yml b/novel-front/src/main/resources/application.yml index 58c2a91..f5d249e 100644 --- a/novel-front/src/main/resources/application.yml +++ b/novel-front/src/main/resources/application.yml @@ -84,10 +84,10 @@ books: #小说的更新间隔(分) updatePeriod: 1 -#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔,3:顶点小说 更多网站解析中,敬请期待 +#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔,3:顶点小说 ,4:百书斋 更多网站解析中,敬请期待 crawl: website: - type: 2 + type: 4 diff --git a/script/crawlbook/application.yml b/script/crawlbook/application.yml index 4ed0aef..c44c400 100644 --- a/script/crawlbook/application.yml +++ b/script/crawlbook/application.yml @@ -1,14 +1,15 @@ server: {port: 8083} spring: - datasource: {url: 'jdbc:mysql://47.106.243.172:3306/novel?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai', - username: novel, password: novel!8888} + datasource: {url: 'jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai', + username: root, password: test123456} mybatis: mapper-locations: classpath:mybatis/mapping/*.xml type-aliases-package: xyz.zinglizingli.books.po configuration: {log-impl: org.apache.ibatis.logging.stdout.StdOutImpl} mysql: {charset: utf8mb4} -books: {lowestScore: '9.0'} +books: {lowestScore: 9.0} crawl: - website: {type: '2'} + website: {type: '4'} soft-novel: '0' manhua: '0' +logging: {config: 'classpath:logback-boot.xml'} diff --git a/script/crawlbook/crawl-book-1.0-SNAPSHOT.jar b/script/crawlbook/crawl-book-1.0-SNAPSHOT.jar index 4e36922..4d0eda3 100644 Binary files a/script/crawlbook/crawl-book-1.0-SNAPSHOT.jar and b/script/crawlbook/crawl-book-1.0-SNAPSHOT.jar differ