From 2726917e3797bf8faa0ab0503556b00d7f35e255 Mon Sep 17 00:00:00 2001
From: xiongxiaoyang <773861846@qq.com>
Date: Fri, 6 Dec 2019 11:33:48 +0800
Subject: [PATCH] =?UTF-8?q?=E7=AC=94=E8=B6=A3=E5=A1=94=E5=9F=9F=E5=90=8D?=
=?UTF-8?q?=E6=9B=B4=E6=8D=A2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../service/impl/BookCrawlServiceImpl.java | 2 +-
.../books/service/BookService.java | 195 ++++--------------
.../common/schedule/CrawlBooksSchedule.java | 54 +----
3 files changed, 51 insertions(+), 200 deletions(-)
diff --git a/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java b/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java
index e3e0225..099b9f8 100644
--- a/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java
+++ b/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java
@@ -407,7 +407,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
}
private void crawBiquTaBooks(int i) {
- String baseUrl = "https://m.biquta.com";
+ String baseUrl = "https://m.biquta.la";
String catBookListUrlBase = baseUrl + "/class/";
if (crawlConfig.getPriority() == 1) {
catBookListUrlBase = baseUrl + "/lhb/";
diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java b/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java
index 4de3098..2a10195 100644
--- a/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java
+++ b/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java
@@ -2,13 +2,13 @@ package xyz.zinglizingli.books.service;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.pagehelper.PageHelper;
+import org.apache.http.client.utils.DateUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.http.HttpEntity;
-import org.springframework.http.HttpHeaders;
-import org.springframework.http.MediaType;
-import org.springframework.http.ResponseEntity;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.core.io.Resource;
+import org.springframework.http.*;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.LinkedMultiValueMap;
@@ -19,10 +19,11 @@ import tk.mybatis.orderbyhelper.OrderByHelper;
import xyz.zinglizingli.books.constant.CacheKeyConstans;
import xyz.zinglizingli.books.mapper.*;
import xyz.zinglizingli.books.po.*;
+import xyz.zinglizingli.books.util.UUIDUtils;
import xyz.zinglizingli.common.cache.CommonCacheUtil;
import xyz.zinglizingli.common.utils.RestTemplateUtil;
-import java.io.IOException;
+import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -48,12 +49,20 @@ public class BookService {
@Autowired
private CommonCacheUtil cacheUtil;
- RestTemplate restTemplate = RestTemplateUtil.getInstance("utf-8");
+ RestTemplate isoRestTemplate = RestTemplateUtil.getInstance("iso-8859-1");
+
+ @Value("${pic.save.type}")
+ private Byte picSaveType;
+
+ @Value("${pic.save.path}")
+ private String picSavePath;
+
private Logger log = LoggerFactory.getLogger(BookService.class);
- public void saveBookAndIndexAndContent(Book book, List bookIndex, List bookContent) {
+
+ public void saveBookAndIndexAndContent(Book book, List bookIndex, List bookContent){
//一次最多只允许插入20条记录,否则影响服务器响应,如果没有插入所有更新,则更新时间设为昨天
/*if(bookIndex.size()>100){
book.setUpdateTime(new Date(book.getUpdateTime().getTime()-1000*60*60*24));
@@ -61,7 +70,7 @@ public class BookService {
*/
boolean isUpdate = false;
- Long bookId = -1l;
+ Long bookId = -1L;
book.setBookName(book.getBookName().trim());
book.setAuthor(book.getAuthor().trim());
BookExample example = new BookExample();
@@ -71,6 +80,34 @@ public class BookService {
//更新
bookId = books.get(0).getId();
book.setId(bookId);
+ String picSrc = book.getPicUrl();
+ if(picSaveType == 2 && org.apache.commons.lang3.StringUtils.isNotBlank(picSrc)){
+ try {
+ HttpHeaders headers = new HttpHeaders();
+ HttpEntity requestEntity = new HttpEntity<>(null, headers);
+ ResponseEntity resEntity = isoRestTemplate.exchange(picSrc, HttpMethod.GET, requestEntity, Resource.class);
+ InputStream input = resEntity.getBody().getInputStream();
+ Date currentDate = new Date();
+ picSrc = "/localPic/" + DateUtils.formatDate(currentDate, "yyyy") + "/" + DateUtils.formatDate(currentDate, "MM") + "/" + DateUtils.formatDate(currentDate, "dd")
+ + UUIDUtils.getUUID32()
+ + picSrc.substring(picSrc.lastIndexOf("."));
+ File picFile = new File(picSavePath + picSrc);
+ File parentFile = picFile.getParentFile();
+ if (!parentFile.exists()) {
+ parentFile.mkdirs();
+ }
+ OutputStream out = new FileOutputStream(picFile);
+ byte[] b = new byte[4096];
+ for (int n; (n = input.read(b)) != -1; ) {
+ out.write(b, 0, n);
+ }
+ out.close();
+ input.close();
+ }catch (Exception e){
+ log.error(e.getMessage(),e);
+ }
+
+ }
bookMapper.updateByPrimaryKeySelective(book);
isUpdate = true;
@@ -128,11 +165,6 @@ public class BookService {
insertIndexListAndContentList(newBookIndexList, newContentList);
}
- if (isUpdate) {
- sendNewstIndex(lastIndex);
- } else {
- sendNewstBook(bookId);
- }
cacheUtil.del(CacheKeyConstans.NEWST_BOOK_LIST_KEY);
@@ -263,72 +295,6 @@ public class BookService {
return content;
}
- private String chargeBookContent(String content) throws IOException {
- StringBuilder contentBuilder = new StringBuilder(content);
- int length = content.length();
- if (length > 100) {
- String jsonResult = cacheUtil.get(CacheKeyConstans.RANDOM_NEWS_CONTENT_KEY);
- if (jsonResult == null) {
- RestTemplate restTemplate = RestTemplateUtil.getInstance("utf-8");
- MultiValueMap mmap = new LinkedMultiValueMap<>();
- HttpHeaders headers = new HttpHeaders();
- headers.add("Host", "channel.chinanews.com");
- headers.add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36");
- HttpEntity> request = new HttpEntity<>(mmap, headers);
- String body = restTemplate.postForEntity("http://channel.chinanews.com/cns/cjs/sh.shtml", request, String.class).getBody();
- Pattern pattern = Pattern.compile("specialcnsdata\\s*=\\s*\\{\"docs\":(.+)};\\s+newslist\\s*=\\s*specialcnsdata;");
- Matcher matcher = pattern.matcher(body);
- if (matcher.find()) {
- jsonResult = matcher.group(1);
- cacheUtil.set(CacheKeyConstans.RANDOM_NEWS_CONTENT_KEY, jsonResult, 60 * 60 * 1);
- }
- }
-
- if (jsonResult.length() > 5) {
- List
");
Matcher descMatch = descPatten.matcher(body);
@@ -343,15 +322,15 @@ public class CrawlBooksSchedule {
//①爬分类列表的书籍url和总页数
// https:
-////m.biquta.com/class/1/1.html
+////m.biquta.la/class/1/1.html
// https:
-////m.biquta.com/class/2/1.html
+////m.biquta.la/class/2/1.html
// https:
-////m.biquta.com/class/2/2.html
+////m.biquta.la/class/2/2.html
//
//
// https:
-////m.biquta.com/class/2/2.html
+////m.biquta.la/class/2/2.html
//
//
//
@@ -528,29 +507,6 @@ public class CrawlBooksSchedule {
if (picMather.find()) {
String picSrc = picMather.group(1);
- if(picSaveType == 2 && StringUtils.isNotBlank(picSrc)){
- HttpHeaders headers = new HttpHeaders();
- headers.add("Referer","https://www.biqudao.com");
- HttpEntity requestEntity = new HttpEntity<>(null, headers);
- ResponseEntity resEntity = isoRestTemplate.exchange(picSrc, HttpMethod.GET, requestEntity, Resource.class);
- InputStream input = resEntity.getBody().getInputStream();
- picSrc = "/localPic/" + updateTimeStr.substring(0,2)+"/"+updateTimeStr.substring(3,5)+"/"+updateTimeStr.substring(6,8)
- + UUIDUtils.getUUID32()
- + picSrc.substring(picSrc.lastIndexOf("."));
- File picFile = new File(picSavePath+picSrc);
- File parentFile = picFile.getParentFile();
- if(!parentFile.exists()){
- parentFile.mkdirs();
- }
- OutputStream out = new FileOutputStream(picFile);
- byte[] b = new byte[4096];
- for (int n; (n = input.read(b)) != -1;) {
- out.write(b, 0, n);
- }
- out.close();
- input.close();
-
- }
Pattern descPatten = Pattern.compile("class=\"review\">([^<]+)");
Matcher descMatch = descPatten.matcher(body);