diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBiqudaoConfig.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBiqudaoConfig.java index aa20164..4c207cb 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBiqudaoConfig.java +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBiqudaoConfig.java @@ -19,7 +19,7 @@ public class CrawlBiqudaoConfig { @Bean @ConfigurationProperties(prefix = "biqudao.crawlsource") // prefix值必须是application.yml中对应属性的前缀 @ConditionalOnProperty(prefix = "biqudao.crawlsource",name = "enabled",havingValue = "true") - public BaseHtmlCrawlSource BiqutaCrawlSource() { + public BaseHtmlCrawlSource biqudaoCrawlSource() { return new BiquCrawlSource(); } diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBiqutaConfig.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBiqutaConfig.java index 5e21c2f..d149268 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBiqutaConfig.java +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBiqutaConfig.java @@ -5,7 +5,6 @@ import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Primary; import xyz.zinglizingli.books.core.crawl.BaseHtmlCrawlSource; import xyz.zinglizingli.books.core.crawl.BiquCrawlSource; @@ -18,10 +17,9 @@ public class CrawlBiqutaConfig { @Bean - @Primary //必须加此注解,不然报错,下一个类则不需要添加 @ConfigurationProperties(prefix = "biquta.crawlsource") // prefix值必须是application.yml中对应属性的前缀 @ConditionalOnProperty(prefix = "biquta.crawlsource",name = "enabled",havingValue = "true") - public BaseHtmlCrawlSource BiqutaCrawlSource() { + public BaseHtmlCrawlSource biqutaCrawlSource() { return new BiquCrawlSource(); } diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlDingdianConfig.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlDingdianConfig.java new file mode 100644 index 0000000..17fc2ce --- /dev/null +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlDingdianConfig.java @@ -0,0 +1,29 @@ +package xyz.zinglizingli.books.core.config; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Primary; +import xyz.zinglizingli.books.core.crawl.BaseHtmlCrawlSource; +import xyz.zinglizingli.books.core.crawl.BiquCrawlSource; + +/** + * @author 11797 + */ +@Slf4j +@Configuration +public class CrawlDingdianConfig { + + + @Bean + @Primary //必须加此注解,不然报错,下一个类则不需要添加 + @ConfigurationProperties(prefix = "dingdian.crawlsource") // prefix值必须是application.yml中对应属性的前缀 + @ConditionalOnProperty(prefix = "dingdian.crawlsource",name = "enabled",havingValue = "true") + public BaseHtmlCrawlSource dingdianCrawlSource() { + return new BiquCrawlSource(); + } + + +} diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java index 001f8af..ec567a2 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiquCrawlSource.java @@ -141,7 +141,7 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource { //查询章节内容 - String body3 = RestTemplateUtil.getBodyByUtf8(contentUrl.replace("//m.","//www.")); + String body3 = RestTemplateUtil.getBodyByUtf8(contentUrl.replace("//m.","//www.").replace("//wap.","//www.")); if (body3 != null) { String start = "id=\"content\">"; String end = "<script>"; diff --git a/novel-front/src/main/resources/application-crawl.yml b/novel-front/src/main/resources/application-crawl.yml index 438ef8b..a653725 100644 --- a/novel-front/src/main/resources/application-crawl.yml +++ b/novel-front/src/main/resources/application-crawl.yml @@ -1,4 +1,4 @@ -#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔 更多网站解析中,敬请期待 +#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔, 3:顶点 更多网站解析中,敬请期待 biquta: crawlsource: enabled: true #是否开启此爬虫源 @@ -30,4 +30,21 @@ biqudao: pic-pattern: <img src="([^>]+)"\s+onerror="this.src= intro-pattern: class="review">([^<]+)</p> catalog-url-pattern: <a\s+href="(/bqge\d+/all\.html)">查看完整目录</a> - catalog-pattern: <a[^/]+style[^/]+href="(/bqge\d+/\d+\.html)">([^/]+)</a> \ No newline at end of file + catalog-pattern: <a[^/]+style[^/]+href="(/bqge\d+/\d+\.html)">([^/]+)</a> + +dingdian: + crawlsource: + enabled: true #是否开启此爬虫源 + index-url: https://wap.dingdiann.com + list-page-url: https://wap.dingdiann.com/sort/{0}/{1}.html + book-url-pattern: href="/(ddk\d+)/" + score-pattern: <div\s+class="score">(\d+\.\d+)分</div> + book-name-pattern: <p class="title">([^/]+)</p> + author-pattern: 作者:([^/]+)< + status-pattern: 状态:([^/]+)</li> + cat-pattern: 类别:([^/]+)</li> + update-time-pattern: 更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a> + pic-pattern: <img src="([^>]+)"\s+onerror="this.src= + intro-pattern: class="review">([^<]+)</p> + catalog-url-pattern: <a\s+href="(/ddk\d+/all.html)">查看完整目录</a> + catalog-pattern: <a\s+style=""\s+href="(/ddk\d+/\d+\.html)">([^/]+)</a> \ No newline at end of file diff --git a/script/crawlbook/application.yml b/script/crawlbook/application.yml index efcb3cf..d1c1479 100644 --- a/script/crawlbook/application.yml +++ b/script/crawlbook/application.yml @@ -1,7 +1,7 @@ server: {port: 8083} spring: datasource: {url: 'jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai', - username: books, password: books} + username: root, password: test123456} mybatis: mapper-locations: classpath:mybatis/mapping/*.xml type-aliases-package: xyz.zinglizingli.books.po @@ -9,6 +9,6 @@ mybatis: mysql: {charset: utf8mb4} books: {lowestScore: '8.5'} crawl: - website: {type: '2'} + website: {type: '3'} soft-novel: '0' manhua: '0' diff --git a/script/crawlbook/crawl-book-1.0-SNAPSHOT.jar b/script/crawlbook/crawl-book-1.0-SNAPSHOT.jar index 82bb453..36251f6 100644 Binary files a/script/crawlbook/crawl-book-1.0-SNAPSHOT.jar and b/script/crawlbook/crawl-book-1.0-SNAPSHOT.jar differ