From 2726917e3797bf8faa0ab0503556b00d7f35e255 Mon Sep 17 00:00:00 2001 From: xiongxiaoyang <773861846@qq.com> Date: Fri, 6 Dec 2019 11:33:48 +0800 Subject: [PATCH] =?UTF-8?q?=E7=AC=94=E8=B6=A3=E5=A1=94=E5=9F=9F=E5=90=8D?= =?UTF-8?q?=E6=9B=B4=E6=8D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/impl/BookCrawlServiceImpl.java | 2 +- .../books/service/BookService.java | 195 ++++-------------- .../common/schedule/CrawlBooksSchedule.java | 54 +---- 3 files changed, 51 insertions(+), 200 deletions(-) diff --git a/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java b/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java index e3e0225..099b9f8 100644 --- a/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java +++ b/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java @@ -407,7 +407,7 @@ public class BookCrawlServiceImpl implements BookCrawlService { } private void crawBiquTaBooks(int i) { - String baseUrl = "https://m.biquta.com"; + String baseUrl = "https://m.biquta.la"; String catBookListUrlBase = baseUrl + "/class/"; if (crawlConfig.getPriority() == 1) { catBookListUrlBase = baseUrl + "/lhb/"; diff --git a/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java b/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java index 4de3098..2a10195 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java +++ b/novel-front/src/main/java/xyz/zinglizingli/books/service/BookService.java @@ -2,13 +2,13 @@ package xyz.zinglizingli.books.service; import com.fasterxml.jackson.databind.ObjectMapper; import com.github.pagehelper.PageHelper; +import org.apache.http.client.utils.DateUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.http.HttpEntity; -import org.springframework.http.HttpHeaders; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.Resource; +import org.springframework.http.*; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.util.LinkedMultiValueMap; @@ -19,10 +19,11 @@ import tk.mybatis.orderbyhelper.OrderByHelper; import xyz.zinglizingli.books.constant.CacheKeyConstans; import xyz.zinglizingli.books.mapper.*; import xyz.zinglizingli.books.po.*; +import xyz.zinglizingli.books.util.UUIDUtils; import xyz.zinglizingli.common.cache.CommonCacheUtil; import xyz.zinglizingli.common.utils.RestTemplateUtil; -import java.io.IOException; +import java.io.*; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -48,12 +49,20 @@ public class BookService { @Autowired private CommonCacheUtil cacheUtil; - RestTemplate restTemplate = RestTemplateUtil.getInstance("utf-8"); + RestTemplate isoRestTemplate = RestTemplateUtil.getInstance("iso-8859-1"); + + @Value("${pic.save.type}") + private Byte picSaveType; + + @Value("${pic.save.path}") + private String picSavePath; + private Logger log = LoggerFactory.getLogger(BookService.class); - public void saveBookAndIndexAndContent(Book book, List bookIndex, List bookContent) { + + public void saveBookAndIndexAndContent(Book book, List bookIndex, List bookContent){ //一次最多只允许插入20条记录,否则影响服务器响应,如果没有插入所有更新,则更新时间设为昨天 /*if(bookIndex.size()>100){ book.setUpdateTime(new Date(book.getUpdateTime().getTime()-1000*60*60*24)); @@ -61,7 +70,7 @@ public class BookService { */ boolean isUpdate = false; - Long bookId = -1l; + Long bookId = -1L; book.setBookName(book.getBookName().trim()); book.setAuthor(book.getAuthor().trim()); BookExample example = new BookExample(); @@ -71,6 +80,34 @@ public class BookService { //更新 bookId = books.get(0).getId(); book.setId(bookId); + String picSrc = book.getPicUrl(); + if(picSaveType == 2 && org.apache.commons.lang3.StringUtils.isNotBlank(picSrc)){ + try { + HttpHeaders headers = new HttpHeaders(); + HttpEntity requestEntity = new HttpEntity<>(null, headers); + ResponseEntity resEntity = isoRestTemplate.exchange(picSrc, HttpMethod.GET, requestEntity, Resource.class); + InputStream input = resEntity.getBody().getInputStream(); + Date currentDate = new Date(); + picSrc = "/localPic/" + DateUtils.formatDate(currentDate, "yyyy") + "/" + DateUtils.formatDate(currentDate, "MM") + "/" + DateUtils.formatDate(currentDate, "dd") + + UUIDUtils.getUUID32() + + picSrc.substring(picSrc.lastIndexOf(".")); + File picFile = new File(picSavePath + picSrc); + File parentFile = picFile.getParentFile(); + if (!parentFile.exists()) { + parentFile.mkdirs(); + } + OutputStream out = new FileOutputStream(picFile); + byte[] b = new byte[4096]; + for (int n; (n = input.read(b)) != -1; ) { + out.write(b, 0, n); + } + out.close(); + input.close(); + }catch (Exception e){ + log.error(e.getMessage(),e); + } + + } bookMapper.updateByPrimaryKeySelective(book); isUpdate = true; @@ -128,11 +165,6 @@ public class BookService { insertIndexListAndContentList(newBookIndexList, newContentList); } - if (isUpdate) { - sendNewstIndex(lastIndex); - } else { - sendNewstBook(bookId); - } cacheUtil.del(CacheKeyConstans.NEWST_BOOK_LIST_KEY); @@ -263,72 +295,6 @@ public class BookService { return content; } - private String chargeBookContent(String content) throws IOException { - StringBuilder contentBuilder = new StringBuilder(content); - int length = content.length(); - if (length > 100) { - String jsonResult = cacheUtil.get(CacheKeyConstans.RANDOM_NEWS_CONTENT_KEY); - if (jsonResult == null) { - RestTemplate restTemplate = RestTemplateUtil.getInstance("utf-8"); - MultiValueMap mmap = new LinkedMultiValueMap<>(); - HttpHeaders headers = new HttpHeaders(); - headers.add("Host", "channel.chinanews.com"); - headers.add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"); - HttpEntity> request = new HttpEntity<>(mmap, headers); - String body = restTemplate.postForEntity("http://channel.chinanews.com/cns/cjs/sh.shtml", request, String.class).getBody(); - Pattern pattern = Pattern.compile("specialcnsdata\\s*=\\s*\\{\"docs\":(.+)};\\s+newslist\\s*=\\s*specialcnsdata;"); - Matcher matcher = pattern.matcher(body); - if (matcher.find()) { - jsonResult = matcher.group(1); - cacheUtil.set(CacheKeyConstans.RANDOM_NEWS_CONTENT_KEY, jsonResult, 60 * 60 * 1); - } - } - - if (jsonResult.length() > 5) { - List> list = new ObjectMapper().readValue(jsonResult, List.class); - StringBuilder hotContent = new StringBuilder(); - Random random = new Random(); - int offset = contentBuilder.indexOf(",", 100); - for (Map map : list) { - if (offset >= 100) { - hotContent.append("

"); - hotContent.append(map.get("pubtime")); - hotContent.append("

"); - contentBuilder.insert(offset + 1, hotContent.toString()); - offset = contentBuilder.indexOf(",", offset + 1 + hotContent.length()); - if (offset > 100) { - hotContent.delete(0, hotContent.length()); - hotContent.append("

"); - hotContent.append(map.get("title")); - hotContent.append("

"); - contentBuilder.insert(offset + 1, hotContent.toString()); - offset = contentBuilder.indexOf(",", offset + 1 + hotContent.length()); - if (offset >= 100) { - hotContent.delete(0, hotContent.length()); - hotContent.append("

"); - hotContent.append(map.get("content")); - hotContent.append("

"); - contentBuilder.insert(offset + 1, hotContent.toString()); - offset = contentBuilder.indexOf(",", offset + 1 + hotContent.length()); - if (offset >= 100) { - hotContent.delete(0, hotContent.length()); - hotContent.append("

"); - hotContent.append(""); - hotContent.append("

"); - contentBuilder.insert(offset + 1, hotContent.toString()); - offset = contentBuilder.indexOf(",", offset + 1 + hotContent.length()); - hotContent.delete(0, hotContent.length()); - } - } - } - } - - } - - } - } - return contentBuilder.toString(); - } public void addVisitCount(Long bookId, String userId, Integer indexNum) { @@ -527,85 +493,14 @@ public class BookService { return bookIndexMapper.countByExample(example); } - public List queryNewstBookIdList() { - return bookMapper.queryNewstBookIdList(); - } public List queryEndBookIdList() { return bookMapper.queryEndBookIdList(); } - private void sendNewstBook(Long bookId) { - try { - if (bookId >= 0) { - - //List idList = queryEndBookIdList(); - MultiValueMap map = new LinkedMultiValueMap<>(); - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.TEXT_PLAIN); - //headers.add("User-Agent","curl/7.12.1"); - headers.add("Host", "data.zz.baidu.com"); - - String reqBody = ""; - reqBody += ("https://www.zinglizingli.xyz/book/" + bookId + ".html" + "\n"); - //reqBody += ("http://www.zinglizingli.xyz/book/" + bookId + ".html" + "\n"); - headers.setContentLength(reqBody.length()); - HttpEntity request = new HttpEntity<>(reqBody, headers); - System.out.println("推送数据:" + reqBody); - ResponseEntity stringResponseEntity = restTemplate.postForEntity("http://data.zz.baidu.com/urls?site=www.zinglizingli.xyz&token=IuK7oVrPKe3U606x", request, String.class); - System.out.println("推送URL结果:code:" + stringResponseEntity.getStatusCode().value() + ",body:" + stringResponseEntity.getBody()); - Thread.sleep(1000 * 3); - - //reqBody += ("http://www.zinglizingli.xyz/book/" + bookId + ".html" + "\n"); - System.out.println("推送数据:" + reqBody); - stringResponseEntity = restTemplate.postForEntity("http://data.zz.baidu.com/urls?appid=1643715155923937&token=fkEcTlId6Cf21Sz3&type=batch", request, String.class); - System.out.println("推送URL结果:code:" + stringResponseEntity.getStatusCode().value() + ",body:" + stringResponseEntity.getBody()); - } - } catch (InterruptedException e) { - log.info(e.getMessage(), e); - } - } - - - private void sendNewstIndex(BookIndex bookIndex) { - try { - if (bookIndex != null) { - MultiValueMap map = new LinkedMultiValueMap<>(); - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.TEXT_PLAIN); - headers.add("Host", "data.zz.baidu.com"); - String reqBody = ""; - //目录只推送最新一条 - reqBody += ("https://www.zinglizingli.xyz/book/" + - bookIndex.getBookId() + "/" + - bookIndex.getIndexNum() + ".html" + "\n"); - headers.setContentLength(reqBody.length()); - HttpEntity request = new HttpEntity<>(reqBody, headers); - System.out.println("推送数据:" + reqBody); - ResponseEntity stringResponseEntity = restTemplate. - postForEntity("http://data.zz.baidu.com/urls?" + - "site=www.zinglizingli.xyz&token=IuK7oVrPKe3U606x" - , request, String.class); - - System.out.println("推送URL结果:code:" + stringResponseEntity.getStatusCode().value() + ",body:" + stringResponseEntity.getBody()); - - - Thread.sleep(1000 * 3); - //reqBody += ("http://www.zinglizingli.xyz/book/" + index.getBookId() + "/" + index.getIndexNum() + ".html" + "\n"); - System.out.println("推送数据:" + reqBody); - stringResponseEntity = restTemplate.postForEntity("http://data.zz.baidu.com/urls?appid=1643715155923937&token=fkEcTlId6Cf21Sz3&type=batch", request, String.class); - System.out.println("推送URL结果:code:" + stringResponseEntity.getStatusCode().value() + ",body:" + stringResponseEntity.getBody()); - - } - } catch (InterruptedException e) { - log.info(e.getMessage(), e); - } - - - } public List queryPreAndNextIndexNum(Long bookId, Integer indexNum) { List result = new ArrayList<>(); diff --git a/novel-front/src/main/java/xyz/zinglizingli/common/schedule/CrawlBooksSchedule.java b/novel-front/src/main/java/xyz/zinglizingli/common/schedule/CrawlBooksSchedule.java index ef07c51..1361334 100644 --- a/novel-front/src/main/java/xyz/zinglizingli/common/schedule/CrawlBooksSchedule.java +++ b/novel-front/src/main/java/xyz/zinglizingli/common/schedule/CrawlBooksSchedule.java @@ -91,7 +91,7 @@ public class CrawlBooksSchedule { } private void updateBiquTaBooks(int finalI) { - String baseUrl = "https://m.biquta.com"; + String baseUrl = "https://m.biquta.la"; String catBookListUrlBase = baseUrl + "/class/"; int page = 1;//起始页码 @@ -167,28 +167,7 @@ public class CrawlBooksSchedule { if (picMather.find()) { String picSrc = picMather.group(1); - if(picSaveType == 2 && StringUtils.isNotBlank(picSrc)){ - HttpHeaders headers = new HttpHeaders(); - HttpEntity requestEntity = new HttpEntity<>(null, headers); - ResponseEntity resEntity = isoRestTemplate.exchange(picSrc, HttpMethod.GET, requestEntity, Resource.class); - InputStream input = resEntity.getBody().getInputStream(); - picSrc = "/localPic/" + updateTimeStr.substring(0,4)+"/"+updateTimeStr.substring(5,7)+"/"+updateTimeStr.substring(8,10) - + UUIDUtils.getUUID32() - + picSrc.substring(picSrc.lastIndexOf(".")); - File picFile = new File(picSavePath+picSrc); - File parentFile = picFile.getParentFile(); - if(!parentFile.exists()){ - parentFile.mkdirs(); - } - OutputStream out = new FileOutputStream(picFile); - byte[] b = new byte[4096]; - for (int n; (n = input.read(b)) != -1;) { - out.write(b, 0, n); - } - out.close(); - input.close(); - } Pattern descPatten = Pattern.compile("class=\"review\">([^<]+)

"); Matcher descMatch = descPatten.matcher(body); @@ -343,15 +322,15 @@ public class CrawlBooksSchedule { //①爬分类列表的书籍url和总页数 // https: -////m.biquta.com/class/1/1.html +////m.biquta.la/class/1/1.html // https: -////m.biquta.com/class/2/1.html +////m.biquta.la/class/2/1.html // https: -////m.biquta.com/class/2/2.html +////m.biquta.la/class/2/2.html // // // https: -////m.biquta.com/class/2/2.html +////m.biquta.la/class/2/2.html // // // @@ -528,29 +507,6 @@ public class CrawlBooksSchedule { if (picMather.find()) { String picSrc = picMather.group(1); - if(picSaveType == 2 && StringUtils.isNotBlank(picSrc)){ - HttpHeaders headers = new HttpHeaders(); - headers.add("Referer","https://www.biqudao.com"); - HttpEntity requestEntity = new HttpEntity<>(null, headers); - ResponseEntity resEntity = isoRestTemplate.exchange(picSrc, HttpMethod.GET, requestEntity, Resource.class); - InputStream input = resEntity.getBody().getInputStream(); - picSrc = "/localPic/" + updateTimeStr.substring(0,2)+"/"+updateTimeStr.substring(3,5)+"/"+updateTimeStr.substring(6,8) - + UUIDUtils.getUUID32() - + picSrc.substring(picSrc.lastIndexOf(".")); - File picFile = new File(picSavePath+picSrc); - File parentFile = picFile.getParentFile(); - if(!parentFile.exists()){ - parentFile.mkdirs(); - } - OutputStream out = new FileOutputStream(picFile); - byte[] b = new byte[4096]; - for (int n; (n = input.read(b)) != -1;) { - out.write(b, 0, n); - } - out.close(); - input.close(); - - } Pattern descPatten = Pattern.compile("class=\"review\">([^<]+)

"); Matcher descMatch = descPatten.matcher(body);