diff --git a/.gitignore b/.gitignore index d779df6..69de160 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ /novel-front/novel-front.iml /.idea +/novel-front/src/main/java/xyz/zinglizingli/books/core/crawl/BiqugeCrawlSource.java +/novel-front/src/main/java/xyz/zinglizingli/books/core/config/CrawlBiqugeConfig.java +/novel-front/src/main/java/xyz/zinglizingli/books/core/schedule/CrawlBookSchedule.java diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/listener/StartListener.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/listener/StartListener.java index a8e644b..fc0d867 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/books/core/listener/StartListener.java +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/listener/StartListener.java @@ -2,10 +2,12 @@ package xyz.zinglizingli.books.core.listener; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; import org.springframework.context.ApplicationListener; import org.springframework.context.event.ContextRefreshedEvent; import org.springframework.stereotype.Component; import xyz.zinglizingli.books.core.crawl.BaseCrawlSource; +import xyz.zinglizingli.books.core.utils.Constants; /** * @author 11797 @@ -17,22 +19,28 @@ public class StartListener implements ApplicationListener private final BaseCrawlSource crawlSource; + @Value("${crawl.book.new.enabled}") + private String crawlEnable; + @Override public void onApplicationEvent(ContextRefreshedEvent event) { - log.info("程序启动"); - new Thread(()->{ - while (true) { - try { - log.info("crawlBooks执行中。。。。。。。。。。。。"); - crawlSource.parse(); - Thread.sleep(1000 * 60 * 5); - } catch (Exception e) { - log.error(e.getMessage(), e); - } + if (!Constants.ENABLE_NEW_BOOK.equals(crawlEnable.trim())) { + log.info("程序启动"); + new Thread(() -> { + while (true) { + try { - } - }).start(); + log.info("crawlBooks执行中。。。。。。。。。。。。"); + crawlSource.parse(); + + Thread.sleep(1000 * 60 * 5); + } catch (Exception e) { + log.error(e.getMessage(), e); + } + + } + }).start(); + } } - } diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/core/utils/Constants.java b/novel-front/src/main/java/xyz/zinglizingli/books/core/utils/Constants.java index b347cdf..5069686 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/books/core/utils/Constants.java +++ b/novel-front/src/main/java/xyz/zinglizingli/books/core/utils/Constants.java @@ -95,4 +95,9 @@ public class Constants { * 书籍内容页的广告pattern * */ public static final String CONTENT_AD_PATTERN = "]+app\\.html[^>]+>\\s*]+>\\s*]+>[^<]+\\s*]+>[^<]+]+>>>[^<]+<<\\s*\\s*\\s*"; + + /** + * 是否开启抓取新书 + * */ + public static final String ENABLE_NEW_BOOK = "true"; } diff --git a/novel-front/src/main/resources/application-crawl.yml b/novel-front/src/main/resources/application-crawl.yml index 039e0a3..7df6f42 100644 --- a/novel-front/src/main/resources/application-crawl.yml +++ b/novel-front/src/main/resources/application-crawl.yml @@ -1,7 +1,15 @@ +#是否抓取新书,true:抓 +crawl: + book: + new: + enabled: false + #抓取频率 + period: 2000 + #爬取的网站名称类型 1:笔趣岛 ,2:笔趣塔, 3:顶点 更多网站解析中,敬请期待 biquta: crawlsource: - enabled: true #是否开启此爬虫源 + enabled: false #是否开启此爬虫源 index-url: https://m.biquta.la list-page-url: https://m.biquta.la/class/{0}/{1}.html book-url-pattern: href="/(\d+_\d+)/" @@ -17,7 +25,7 @@ biquta: catalog-pattern: ([^/]+) biqudao: crawlsource: - enabled: true #是否开启此爬虫源 + enabled: false #是否开启此爬虫源 index-url: https://m.biqudao.com list-page-url: https://m.biqudao.com/bqgeclass/{0}/{1}.html book-url-pattern: href="/(bqge\d+)/" @@ -34,7 +42,7 @@ biqudao: dingdian: crawlsource: - enabled: true #是否开启此爬虫源 + enabled: false #是否开启此爬虫源 index-url: https://wap.dingdiann.com list-page-url: https://wap.dingdiann.com/sort/{0}/{1}.html book-url-pattern: href="/(ddk\d+)/" @@ -47,4 +55,21 @@ dingdian: pic-pattern: ([^/]+)

catalog-url-pattern: 查看完整目录 - catalog-pattern: ([^/]+) \ No newline at end of file + catalog-pattern: ([^/]+) + +biquge: + crawlsource: + enabled: true #是否开启此爬虫源 + index-url: http://m.biquge.info + list-page-url: http://m.biquge.info/paihangbang_lastupdate/{0}.html + book-url-pattern: href="/(\d+_\d+)/" + score-pattern: (\d+) + book-name-pattern: ([^<]+) + author-pattern: 作者:([^<]+)< + status-pattern:

状态:([^<]+)

+ cat-pattern: ([^<]+) + update-time-pattern:

更新:(\d+-\d+-\d+T\d+:\d+:\d+)

+ pic-pattern: \s*([^/]+)

+ catalog-url-pattern: 查看完整目录 + catalog-pattern:
\s*([^<]+)\s*
\ No newline at end of file diff --git a/novel-front/src/main/resources/application.yml b/novel-front/src/main/resources/application.yml index 3298845..5273f57 100644 --- a/novel-front/src/main/resources/application.yml +++ b/novel-front/src/main/resources/application.yml @@ -3,9 +3,9 @@ server: spring: datasource: - url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai - username: root - password: test123456 + url: jdbc:mysql://35.236.132.9:3306/books?useUnicode=true&characterEncoding=utf-8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai + username: books + password: 123 # url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf8&useSSL=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai # username: root # password: test123456