mirror of
https://github.com/201206030/novel.git
synced 2025-04-27 07:30:50 +00:00
新增顶点小说网站解析,设置默认爬虫源为定点小说
This commit is contained in:
parent
c71a5ebd6f
commit
793d32b314
@ -19,7 +19,7 @@ public class CrawlBiqudaoConfig {
|
|||||||
@Bean
|
@Bean
|
||||||
@ConfigurationProperties(prefix = "biqudao.crawlsource") // prefix值必须是application.yml中对应属性的前缀
|
@ConfigurationProperties(prefix = "biqudao.crawlsource") // prefix值必须是application.yml中对应属性的前缀
|
||||||
@ConditionalOnProperty(prefix = "biqudao.crawlsource",name = "enabled",havingValue = "true")
|
@ConditionalOnProperty(prefix = "biqudao.crawlsource",name = "enabled",havingValue = "true")
|
||||||
public BaseHtmlCrawlSource BiqutaCrawlSource() {
|
public BaseHtmlCrawlSource biqudaoCrawlSource() {
|
||||||
return new BiquCrawlSource();
|
return new BiquCrawlSource();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,7 +5,6 @@ import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
|||||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
import org.springframework.context.annotation.Primary;
|
|
||||||
import xyz.zinglizingli.books.core.crawl.BaseHtmlCrawlSource;
|
import xyz.zinglizingli.books.core.crawl.BaseHtmlCrawlSource;
|
||||||
import xyz.zinglizingli.books.core.crawl.BiquCrawlSource;
|
import xyz.zinglizingli.books.core.crawl.BiquCrawlSource;
|
||||||
|
|
||||||
@ -18,10 +17,9 @@ public class CrawlBiqutaConfig {
|
|||||||
|
|
||||||
|
|
||||||
@Bean
|
@Bean
|
||||||
@Primary //必须加此注解,不然报错,下一个类则不需要添加
|
|
||||||
@ConfigurationProperties(prefix = "biquta.crawlsource") // prefix值必须是application.yml中对应属性的前缀
|
@ConfigurationProperties(prefix = "biquta.crawlsource") // prefix值必须是application.yml中对应属性的前缀
|
||||||
@ConditionalOnProperty(prefix = "biquta.crawlsource",name = "enabled",havingValue = "true")
|
@ConditionalOnProperty(prefix = "biquta.crawlsource",name = "enabled",havingValue = "true")
|
||||||
public BaseHtmlCrawlSource BiqutaCrawlSource() {
|
public BaseHtmlCrawlSource biqutaCrawlSource() {
|
||||||
return new BiquCrawlSource();
|
return new BiquCrawlSource();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,29 @@
|
|||||||
|
package xyz.zinglizingli.books.core.config;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.context.annotation.Primary;
|
||||||
|
import xyz.zinglizingli.books.core.crawl.BaseHtmlCrawlSource;
|
||||||
|
import xyz.zinglizingli.books.core.crawl.BiquCrawlSource;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author 11797
|
||||||
|
*/
|
||||||
|
@Slf4j
|
||||||
|
@Configuration
|
||||||
|
public class CrawlDingdianConfig {
|
||||||
|
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@Primary //必须加此注解,不然报错,下一个类则不需要添加
|
||||||
|
@ConfigurationProperties(prefix = "dingdian.crawlsource") // prefix值必须是application.yml中对应属性的前缀
|
||||||
|
@ConditionalOnProperty(prefix = "dingdian.crawlsource",name = "enabled",havingValue = "true")
|
||||||
|
public BaseHtmlCrawlSource dingdianCrawlSource() {
|
||||||
|
return new BiquCrawlSource();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@ -141,7 +141,7 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
|
|||||||
|
|
||||||
|
|
||||||
//查询章节内容
|
//查询章节内容
|
||||||
String body3 = RestTemplateUtil.getBodyByUtf8(contentUrl.replace("//m.","//www."));
|
String body3 = RestTemplateUtil.getBodyByUtf8(contentUrl.replace("//m.","//www.").replace("//wap.","//www."));
|
||||||
if (body3 != null) {
|
if (body3 != null) {
|
||||||
String start = "id=\"content\">";
|
String start = "id=\"content\">";
|
||||||
String end = "<script>";
|
String end = "<script>";
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔 更多网站解析中,敬请期待
|
#爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔, 3:顶点 更多网站解析中,敬请期待
|
||||||
biquta:
|
biquta:
|
||||||
crawlsource:
|
crawlsource:
|
||||||
enabled: true #是否开启此爬虫源
|
enabled: true #是否开启此爬虫源
|
||||||
@ -31,3 +31,20 @@ biqudao:
|
|||||||
intro-pattern: class="review">([^<]+)</p>
|
intro-pattern: class="review">([^<]+)</p>
|
||||||
catalog-url-pattern: <a\s+href="(/bqge\d+/all\.html)">查看完整目录</a>
|
catalog-url-pattern: <a\s+href="(/bqge\d+/all\.html)">查看完整目录</a>
|
||||||
catalog-pattern: <a[^/]+style[^/]+href="(/bqge\d+/\d+\.html)">([^/]+)</a>
|
catalog-pattern: <a[^/]+style[^/]+href="(/bqge\d+/\d+\.html)">([^/]+)</a>
|
||||||
|
|
||||||
|
dingdian:
|
||||||
|
crawlsource:
|
||||||
|
enabled: true #是否开启此爬虫源
|
||||||
|
index-url: https://wap.dingdiann.com
|
||||||
|
list-page-url: https://wap.dingdiann.com/sort/{0}/{1}.html
|
||||||
|
book-url-pattern: href="/(ddk\d+)/"
|
||||||
|
score-pattern: <div\s+class="score">(\d+\.\d+)分</div>
|
||||||
|
book-name-pattern: <p class="title">([^/]+)</p>
|
||||||
|
author-pattern: 作者:([^/]+)<
|
||||||
|
status-pattern: 状态:([^/]+)</li>
|
||||||
|
cat-pattern: 类别:([^/]+)</li>
|
||||||
|
update-time-pattern: 更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a>
|
||||||
|
pic-pattern: <img src="([^>]+)"\s+onerror="this.src=
|
||||||
|
intro-pattern: class="review">([^<]+)</p>
|
||||||
|
catalog-url-pattern: <a\s+href="(/ddk\d+/all.html)">查看完整目录</a>
|
||||||
|
catalog-pattern: <a\s+style=""\s+href="(/ddk\d+/\d+\.html)">([^/]+)</a>
|
@ -1,7 +1,7 @@
|
|||||||
server: {port: 8083}
|
server: {port: 8083}
|
||||||
spring:
|
spring:
|
||||||
datasource: {url: 'jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai',
|
datasource: {url: 'jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai',
|
||||||
username: books, password: books}
|
username: root, password: test123456}
|
||||||
mybatis:
|
mybatis:
|
||||||
mapper-locations: classpath:mybatis/mapping/*.xml
|
mapper-locations: classpath:mybatis/mapping/*.xml
|
||||||
type-aliases-package: xyz.zinglizingli.books.po
|
type-aliases-package: xyz.zinglizingli.books.po
|
||||||
@ -9,6 +9,6 @@ mybatis:
|
|||||||
mysql: {charset: utf8mb4}
|
mysql: {charset: utf8mb4}
|
||||||
books: {lowestScore: '8.5'}
|
books: {lowestScore: '8.5'}
|
||||||
crawl:
|
crawl:
|
||||||
website: {type: '2'}
|
website: {type: '3'}
|
||||||
soft-novel: '0'
|
soft-novel: '0'
|
||||||
manhua: '0'
|
manhua: '0'
|
||||||
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user