增加百书斋源

This commit is contained in:
xiongxiaoyang
2020-04-14 18:42:09 +08:00
parent f648a8e79e
commit 56645720b3
7 changed files with 68 additions and 9 deletions

View File

@ -0,0 +1,29 @@
package xyz.zinglizingli.books.core.config;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Primary;
import xyz.zinglizingli.books.core.crawl.BaseHtmlCrawlSource;
import xyz.zinglizingli.books.core.crawl.BiquCrawlSource;
/**
* @author 11797
*/
@Slf4j
@Configuration
public class CrawlBaishuzhaiConfig {
@Bean
@Primary //必须加此注解,不然报错,下一个类则不需要添加
@ConfigurationProperties(prefix = "baishuzhai.crawlsource") // prefix值必须是application.yml中对应属性的前缀
@ConditionalOnProperty(prefix = "crawl.website",name = "type",havingValue = "4")
public BaseHtmlCrawlSource dingdianCrawlSource() {
return new BiquCrawlSource();
}
}

View File

@ -91,7 +91,13 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
SimpleDateFormat format ;
if(updateTimeStr.length()>10){
format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
}else{
format = new SimpleDateFormat("yy-MM-dd");
}
updateTime = format.parse(updateTimeStr);
if(!newCat2Date.containsKey(i)) {
newCat2Date.put(i, updateTime);
@ -159,7 +165,13 @@ public class BiquCrawlSource extends BaseHtmlCrawlSource {
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
SimpleDateFormat format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
SimpleDateFormat format ;
if(updateTimeStr.length()>10){
format = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
}else{
format = new SimpleDateFormat("yy-MM-dd");
}
Date updateTime = format.parse(updateTimeStr);
Pattern picPatten = compile(getPicPattern());
Matcher picMather = picPatten.matcher(body);