更新爬虫可插拔

This commit is contained in:
xiongxiaoyang
2019-12-23 13:48:46 +08:00
parent 82618f354e
commit 58eb59735e
5 changed files with 61 additions and 20 deletions

View File

@ -2,10 +2,12 @@ package xyz.zinglizingli.books.core.listener;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.ApplicationListener;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.stereotype.Component;
import xyz.zinglizingli.books.core.crawl.BaseCrawlSource;
import xyz.zinglizingli.books.core.utils.Constants;
/**
* @author 11797
@ -17,22 +19,28 @@ public class StartListener implements ApplicationListener<ContextRefreshedEvent>
private final BaseCrawlSource crawlSource;
@Value("${crawl.book.new.enabled}")
private String crawlEnable;
@Override
public void onApplicationEvent(ContextRefreshedEvent event) {
log.info("程序启动");
new Thread(()->{
while (true) {
try {
log.info("crawlBooks执行中。。。。。。。。。。。。");
crawlSource.parse();
Thread.sleep(1000 * 60 * 5);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
if (!Constants.ENABLE_NEW_BOOK.equals(crawlEnable.trim())) {
log.info("程序启动");
new Thread(() -> {
while (true) {
try {
}
}).start();
log.info("crawlBooks执行中。。。。。。。。。。。。");
crawlSource.parse();
Thread.sleep(1000 * 60 * 5);
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
}).start();
}
}
}

View File

@ -95,4 +95,9 @@ public class Constants {
* 书籍内容页的广告pattern
* */
public static final String CONTENT_AD_PATTERN = "<div[^>]+app\\.html[^>]+>\\s*<div[^>]+>\\s*<div[^>]+>[^<]+</div>\\s*<div[^>]+>[^<]+<span[^>]+>>>[^<]+<<</span>\\s*</div>\\s*</div>\\s*</div>";
/**
* 是否开启抓取新书
* */
public static final String ENABLE_NEW_BOOK = "true";
}