From 0a105044616347145a3300b914b33b4d0f97896e Mon Sep 17 00:00:00 2001
From: xiongxiaoyang <773861846@qq.com>
Date: Wed, 23 Dec 2020 23:48:34 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E7=AB=A0=E8=8A=82=E5=AD=97?=
 =?UTF-8?q?=E6=95=B0=E7=AE=97=E6=B3=95=EF=BC=8C=E4=BC=98=E5=8C=96=E7=88=AC?=
 =?UTF-8?q?=E8=99=AB=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../java2nb/novel/core/crawl/CrawlParser.java | 118 +++++++++---------
 .../java2nb/novel/service/BookService.java    |   5 +-
 .../novel/service/impl/BookServiceImpl.java   |  29 +----
 .../resources/mybatis/mapping/BookMapper.xml  |   3 +-
 4 files changed, 66 insertions(+), 89 deletions(-)

diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java
index 734e4e0..1cd1c95 100644
--- a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java
+++ b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java
@@ -1,9 +1,6 @@
 package com.java2nb.novel.core.crawl;
 
-import com.java2nb.novel.core.utils.HttpUtil;
-import com.java2nb.novel.core.utils.IdWorker;
-import com.java2nb.novel.core.utils.RandomBookInfoUtil;
-import com.java2nb.novel.core.utils.RestTemplateUtil;
+import com.java2nb.novel.core.utils.*;
 import com.java2nb.novel.entity.Book;
 import com.java2nb.novel.entity.BookContent;
 import com.java2nb.novel.entity.BookIndex;
@@ -37,7 +34,7 @@ public class CrawlParser {
 
     private static RestTemplate restTemplate = RestTemplateUtil.getInstance("utf-8");
 
-    private static ThreadLocal <Integer> retryCount = new ThreadLocal<>();
+    private static ThreadLocal<Integer> retryCount = new ThreadLocal<>();
 
     @SneakyThrows
     public static Book parseBook(RuleBean ruleBean, String bookId) {
@@ -65,7 +62,7 @@ public class CrawlParser {
                         boolean isFindPicUrl = picUrlMatch.find();
                         if (isFindPicUrl) {
                             String picUrl = picUrlMatch.group(1);
-                            if(StringUtils.isNotBlank(picUrl) && StringUtils.isNotBlank(ruleBean.getPicUrlPrefix())) {
+                            if (StringUtils.isNotBlank(picUrl) && StringUtils.isNotBlank(ruleBean.getPicUrlPrefix())) {
                                 picUrl = ruleBean.getPicUrlPrefix() + picUrl;
                             }
                             //设置封面图片路径
@@ -96,11 +93,11 @@ public class CrawlParser {
                     String desc = bookDetailHtml.substring(bookDetailHtml.indexOf(ruleBean.getDescStart()) + ruleBean.getDescStart().length());
                     desc = desc.substring(0, desc.indexOf(ruleBean.getDescEnd()));
                     //过滤掉简介中的特殊标签
-                    desc = desc.replaceAll("<a[^<]+</a>","")
-                            .replaceAll("<font[^<]+</font>","")
-                            .replaceAll("<p>\\s*</p>","")
-                            .replaceAll("<p>","")
-                            .replaceAll("</p>","<br/>");
+                    desc = desc.replaceAll("<a[^<]+</a>", "")
+                            .replaceAll("<font[^<]+</font>", "")
+                            .replaceAll("<p>\\s*</p>", "")
+                            .replaceAll("<p>", "")
+                            .replaceAll("</p>", "<br/>");
                     //设置书籍简介
                     book.setBookDesc(desc);
                     if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
@@ -146,9 +143,9 @@ public class CrawlParser {
     }
 
     public static Map<Integer, List> parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean, Map<Integer, BookIndex> hasIndexs) {
-        Map<Integer,List> result = new HashMap<>(2);
-        result.put(BOOK_INDEX_LIST_KEY,new ArrayList(0));
-        result.put(BOOK_CONTENT_LIST_KEY,new ArrayList(0));
+        Map<Integer, List> result = new HashMap<>(2);
+        result.put(BOOK_INDEX_LIST_KEY, new ArrayList(0));
+        result.put(BOOK_CONTENT_LIST_KEY, new ArrayList(0));
 
         Date currentDate = new Date();
 
@@ -159,7 +156,7 @@ public class CrawlParser {
         String indexListHtml = getByHttpClientWithChrome(indexListUrl);
 
         if (indexListHtml != null) {
-            if(StringUtils.isNotBlank(ruleBean.getBookIndexStart())){
+            if (StringUtils.isNotBlank(ruleBean.getBookIndexStart())) {
                 indexListHtml = indexListHtml.substring(indexListHtml.indexOf(ruleBean.getBookIndexStart()) + ruleBean.getBookIndexStart().length());
             }
 
@@ -189,12 +186,12 @@ public class CrawlParser {
                     String sourceIndexId = indexIdMatch.group(1);
                     String bookContentUrl = ruleBean.getBookContentUrl();
                     int calStart = bookContentUrl.indexOf("{cal_");
-                    if(calStart != -1){
+                    if (calStart != -1) {
                         //内容页URL需要进行计算才能得到
-                        String calStr = bookContentUrl.substring(calStart,calStart+bookContentUrl.substring(calStart).indexOf("}"));
+                        String calStr = bookContentUrl.substring(calStart, calStart + bookContentUrl.substring(calStart).indexOf("}"));
                         String[] calArr = calStr.split("_");
                         int calType = Integer.parseInt(calArr[1]);
-                        if(calType == 1) {
+                        if (calType == 1) {
                             ///{cal_1_1_3}_{bookId}/{indexId}.html
                             //第一种计算规则,去除第x个参数的最后y个字母
                             int x = Integer.parseInt(calArr[2]);
@@ -206,12 +203,12 @@ public class CrawlParser {
                                 calResult = sourceIndexId.substring(0, sourceBookId.length() - y);
                             }
 
-                            if(calResult.length() == 0){
+                            if (calResult.length() == 0) {
                                 calResult = "0";
 
                             }
 
-                            bookContentUrl = bookContentUrl.replace(calStr+"}", calResult);
+                            bookContentUrl = bookContentUrl.replace(calStr + "}", calResult);
                         }
 
                     }
@@ -223,52 +220,40 @@ public class CrawlParser {
                     if (contentHtml != null && !contentHtml.contains("正在手打中")) {
                         String content = contentHtml.substring(contentHtml.indexOf(ruleBean.getContentStart()) + ruleBean.getContentStart().length());
                         content = content.substring(0, content.indexOf(ruleBean.getContentEnd()));
-                        //TODO插入章节目录和章节内容
+                        //插入章节目录和章节内容
                         BookIndex bookIndex = new BookIndex();
-
                         bookIndex.setIndexName(indexName);
                         bookIndex.setIndexNum(indexNum);
+                        Integer wordCount = StringUtil.getStrValidWordCount(content);
+                        bookIndex.setWordCount(wordCount);
                         indexList.add(bookIndex);
-                        BookContent bookContent = new BookContent();
 
+                        BookContent bookContent = new BookContent();
                         bookContent.setContent(content);
                         contentList.add(bookContent);
 
-                        //判断是新增还是更新
-                        if(hasIndexs.size() == 0){
-                            //新书入库
+
+                        if (hasIndex != null) {
+                            //章节更新
+                            bookIndex.setId(hasIndex.getId());
+                            bookContent.setIndexId(hasIndex.getId());
+                        } else {
+                            //章节插入
                             //设置目录和章节内容
                             Long indexId = idWorker.nextId();
                             lastIndexId = indexId;
                             lastIndexName = indexName;
                             bookIndex.setId(indexId);
                             bookIndex.setBookId(book.getId());
-                            Integer wordCount = bookContent.getContent().length();
-                            totalWordCount += wordCount;
-                            bookIndex.setWordCount(wordCount);
+
                             bookIndex.setCreateTime(currentDate);
-                            bookIndex.setUpdateTime(currentDate);
 
                             bookContent.setIndexId(indexId);
-
-                            //设置小说基础信息
-                            book.setWordCount(totalWordCount);
-                            book.setLastIndexId(lastIndexId);
-                            book.setLastIndexName(lastIndexName);
-                            book.setLastIndexUpdateTime(currentDate);
-                            book.setCreateTime(currentDate);
-                            book.setUpdateTime(currentDate);
-
-                        }else{
-                            //老书更新
                         }
+                        bookIndex.setUpdateTime(currentDate);
 
-
-
-                        if(hasIndex != null){
-                            bookIndex.setId(hasIndex.getId());
-                            bookContent.setIndexId(hasIndex.getId());
-                        }
+                        //计算总字数
+                        totalWordCount += wordCount;
 
 
                     }
@@ -279,15 +264,30 @@ public class CrawlParser {
                 isFindIndex = indexIdMatch.find() & indexNameMatch.find();
             }
 
+            //判断是新书入库还是老书更新
+            if (hasIndexs.size() == 0) {
+                //新书入库
+
+                //设置小说基础信息
+                book.setWordCount(totalWordCount);
+                book.setLastIndexId(lastIndexId);
+                book.setLastIndexName(lastIndexName);
+                book.setLastIndexUpdateTime(currentDate);
+                book.setCreateTime(currentDate);
+
+            }
+            book.setUpdateTime(currentDate);
+
             if (indexList.size() == contentList.size() && indexList.size() > 0) {
 
-                result.put(BOOK_INDEX_LIST_KEY,indexList);
-                result.put(BOOK_CONTENT_LIST_KEY,contentList);
+                result.put(BOOK_INDEX_LIST_KEY, indexList);
+                result.put(BOOK_CONTENT_LIST_KEY, contentList);
 
             }
 
         }
 
+
         return result;
     }
 
@@ -297,7 +297,7 @@ public class CrawlParser {
             ResponseEntity<String> forEntity = restTemplate.getForEntity(url, String.class);
             if (forEntity.getStatusCode() == HttpStatus.OK) {
                 String body = forEntity.getBody();
-                if(body.length() < Constants.INVALID_HTML_LENGTH){
+                if (body.length() < Constants.INVALID_HTML_LENGTH) {
                     return processErrorHttpResult(url);
                 }
                 //成功获得html内容
@@ -314,11 +314,11 @@ public class CrawlParser {
         try {
 
             String body = HttpUtil.getByHttpClientWithChrome(url);
-                if(body != null && body.length() < Constants.INVALID_HTML_LENGTH){
-                    return processErrorHttpResult(url);
-                }
-                //成功获得html内容
-                return body;
+            if (body != null && body.length() < Constants.INVALID_HTML_LENGTH) {
+                return processErrorHttpResult(url);
+            }
+            //成功获得html内容
+            return body;
         } catch (Exception e) {
             e.printStackTrace();
         }
@@ -327,13 +327,13 @@ public class CrawlParser {
     }
 
     @SneakyThrows
-    private static String processErrorHttpResult(String url){
+    private static String processErrorHttpResult(String url) {
         Integer count = retryCount.get();
-        if(count == null){
+        if (count == null) {
             count = 0;
         }
-        if(count < Constants.HTTP_FAIL_RETRY_COUNT){
-            Thread.sleep(  new Random().nextInt(10*1000));
+        if (count < Constants.HTTP_FAIL_RETRY_COUNT) {
+            Thread.sleep(new Random().nextInt(10 * 1000));
             retryCount.set(++count);
             return getByHttpClient(url);
         }
diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/BookService.java b/novel-crawl/src/main/java/com/java2nb/novel/service/BookService.java
index 8585fd1..1b343e9 100644
--- a/novel-crawl/src/main/java/com/java2nb/novel/service/BookService.java
+++ b/novel-crawl/src/main/java/com/java2nb/novel/service/BookService.java
@@ -65,9 +65,8 @@ public interface BookService {
      * @param book 小说数据
      * @param bookIndexList 目录集合
      * @param bookContentList 内容集合
-     * @param existBookIndexMap  已存在的章节Map
-     * */
-    void updateBookAndIndexAndContent(Book book, List<BookIndex> bookIndexList, List<BookContent> bookContentList, Map<Integer, BookIndex> existBookIndexMap);
+     * @param existBookIndexMap  已存在的章节Map   */
+    void updateBookAndIndexAndContent(Book book,  List<BookIndex> bookIndexList, List<BookContent> bookContentList, Map<Integer, BookIndex> existBookIndexMap);
 
     /**
      * 更新一下最后一次的抓取时间
diff --git a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/BookServiceImpl.java b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/BookServiceImpl.java
index 02a627a..cc0f776 100644
--- a/novel-crawl/src/main/java/com/java2nb/novel/service/impl/BookServiceImpl.java
+++ b/novel-crawl/src/main/java/com/java2nb/novel/service/impl/BookServiceImpl.java
@@ -1,6 +1,5 @@
 package com.java2nb.novel.service.impl;
 
-import com.java2nb.novel.core.utils.IdWorker;
 import com.java2nb.novel.entity.Book;
 import com.java2nb.novel.entity.BookContent;
 import com.java2nb.novel.entity.BookIndex;
@@ -79,10 +78,6 @@ public class BookServiceImpl implements BookService {
 
             if(bookIndexList.size()>0) {
 
-                if (book.getId() == null) {
-                    book.setId(new IdWorker().nextId());
-                }
-
                 //保存小说主表
 
                 bookMapper.insertSelective(book);
@@ -122,36 +117,20 @@ public class BookServiceImpl implements BookService {
 
     @Transactional(rollbackFor = Exception.class)
     @Override
-    public void updateBookAndIndexAndContent(Book book, List<BookIndex> bookIndexList, List<BookContent> bookContentList, Map<Integer, BookIndex> existBookIndexMap) {
+    public void updateBookAndIndexAndContent(Book book,  List<BookIndex> bookIndexList, List<BookContent> bookContentList, Map<Integer, BookIndex> existBookIndexMap) {
         Date currentDate = new Date();
         for (int i = 0; i < bookIndexList.size(); i++) {
             BookIndex bookIndex = bookIndexList.get(i);
             BookContent bookContent = bookContentList.get(i);
 
-            //插入或更新目录
-            Integer wordCount = bookContent.getContent().length();
-            bookIndex.setWordCount(wordCount);
-            bookIndex.setUpdateTime(currentDate);
 
-            if(bookIndex.getId() == null) {
+            if(!existBookIndexMap.containsKey(bookIndex.getIndexNum())) {
                 //插入
-                bookIndex.setBookId(book.getId());
-                Long indexId = new IdWorker().nextId();
-                bookIndex.setId(indexId);
-                bookIndex.setCreateTime(currentDate);
                 bookIndexMapper.insertSelective(bookIndex);
-            }else{
-                //更新
-                bookIndexMapper.updateByPrimaryKeySelective(bookIndex);
-            }
-
-            if(bookContent.getIndexId() == null) {
-                //插入
-                bookContent.setIndexId(bookIndex.getId());
                 bookContentMapper.insertSelective(bookContent);
             }else{
                 //更新
-
+                bookIndexMapper.updateByPrimaryKeySelective(bookIndex);
                 bookContentMapper.update(update(BookContentDynamicSqlSupport.bookContent)
                         .set(BookContentDynamicSqlSupport.content)
                         .equalTo(bookContent.getContent())
@@ -160,6 +139,7 @@ public class BookServiceImpl implements BookService {
                         .render(RenderingStrategies.MYBATIS3));
             }
 
+
         }
 
         //更新小说主表
@@ -174,7 +154,6 @@ public class BookServiceImpl implements BookService {
                 book.setLastIndexUpdateTime(currentDate);
             }
         }
-        book.setUpdateTime(currentDate);
         book.setBookName(null);
         book.setAuthorName(null);
         if(Constants.VISIT_COUNT_DEFAULT.equals(book.getVisitCount())) {
diff --git a/novel-crawl/src/main/resources/mybatis/mapping/BookMapper.xml b/novel-crawl/src/main/resources/mybatis/mapping/BookMapper.xml
index 1ef6d90..ea9f378 100644
--- a/novel-crawl/src/main/resources/mybatis/mapping/BookMapper.xml
+++ b/novel-crawl/src/main/resources/mybatis/mapping/BookMapper.xml
@@ -15,8 +15,7 @@
 
     <select id="queryTotalWordCount" parameterType="long" resultType="int">
 
-        select sum(t2.word_count) from book t1 inner join book_index t2
-        on t1.id = t2.book_id and t1.id = #{bookId}
+        select sum(word_count) from book_index where book_id = #{bookId}
     </select>
 
     <update id="updateCrawlLastTime">