mirror of
https://github.com/201206030/novel-plus.git
synced 2025-06-13 08:58:30 +00:00
优化章节字数算法,优化爬虫代码
This commit is contained in:
parent
1046a7ffc1
commit
0a10504461
@ -1,9 +1,6 @@
|
||||
package com.java2nb.novel.core.crawl;
|
||||
|
||||
import com.java2nb.novel.core.utils.HttpUtil;
|
||||
import com.java2nb.novel.core.utils.IdWorker;
|
||||
import com.java2nb.novel.core.utils.RandomBookInfoUtil;
|
||||
import com.java2nb.novel.core.utils.RestTemplateUtil;
|
||||
import com.java2nb.novel.core.utils.*;
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
@ -223,52 +220,40 @@ public class CrawlParser {
|
||||
if (contentHtml != null && !contentHtml.contains("正在手打中")) {
|
||||
String content = contentHtml.substring(contentHtml.indexOf(ruleBean.getContentStart()) + ruleBean.getContentStart().length());
|
||||
content = content.substring(0, content.indexOf(ruleBean.getContentEnd()));
|
||||
//TODO插入章节目录和章节内容
|
||||
//插入章节目录和章节内容
|
||||
BookIndex bookIndex = new BookIndex();
|
||||
|
||||
bookIndex.setIndexName(indexName);
|
||||
bookIndex.setIndexNum(indexNum);
|
||||
Integer wordCount = StringUtil.getStrValidWordCount(content);
|
||||
bookIndex.setWordCount(wordCount);
|
||||
indexList.add(bookIndex);
|
||||
BookContent bookContent = new BookContent();
|
||||
|
||||
BookContent bookContent = new BookContent();
|
||||
bookContent.setContent(content);
|
||||
contentList.add(bookContent);
|
||||
|
||||
//判断是新增还是更新
|
||||
if(hasIndexs.size() == 0){
|
||||
//新书入库
|
||||
|
||||
if (hasIndex != null) {
|
||||
//章节更新
|
||||
bookIndex.setId(hasIndex.getId());
|
||||
bookContent.setIndexId(hasIndex.getId());
|
||||
} else {
|
||||
//章节插入
|
||||
//设置目录和章节内容
|
||||
Long indexId = idWorker.nextId();
|
||||
lastIndexId = indexId;
|
||||
lastIndexName = indexName;
|
||||
bookIndex.setId(indexId);
|
||||
bookIndex.setBookId(book.getId());
|
||||
Integer wordCount = bookContent.getContent().length();
|
||||
totalWordCount += wordCount;
|
||||
bookIndex.setWordCount(wordCount);
|
||||
|
||||
bookIndex.setCreateTime(currentDate);
|
||||
bookIndex.setUpdateTime(currentDate);
|
||||
|
||||
bookContent.setIndexId(indexId);
|
||||
|
||||
//设置小说基础信息
|
||||
book.setWordCount(totalWordCount);
|
||||
book.setLastIndexId(lastIndexId);
|
||||
book.setLastIndexName(lastIndexName);
|
||||
book.setLastIndexUpdateTime(currentDate);
|
||||
book.setCreateTime(currentDate);
|
||||
book.setUpdateTime(currentDate);
|
||||
|
||||
}else{
|
||||
//老书更新
|
||||
}
|
||||
bookIndex.setUpdateTime(currentDate);
|
||||
|
||||
|
||||
|
||||
if(hasIndex != null){
|
||||
bookIndex.setId(hasIndex.getId());
|
||||
bookContent.setIndexId(hasIndex.getId());
|
||||
}
|
||||
//计算总字数
|
||||
totalWordCount += wordCount;
|
||||
|
||||
|
||||
}
|
||||
@ -279,6 +264,20 @@ public class CrawlParser {
|
||||
isFindIndex = indexIdMatch.find() & indexNameMatch.find();
|
||||
}
|
||||
|
||||
//判断是新书入库还是老书更新
|
||||
if (hasIndexs.size() == 0) {
|
||||
//新书入库
|
||||
|
||||
//设置小说基础信息
|
||||
book.setWordCount(totalWordCount);
|
||||
book.setLastIndexId(lastIndexId);
|
||||
book.setLastIndexName(lastIndexName);
|
||||
book.setLastIndexUpdateTime(currentDate);
|
||||
book.setCreateTime(currentDate);
|
||||
|
||||
}
|
||||
book.setUpdateTime(currentDate);
|
||||
|
||||
if (indexList.size() == contentList.size() && indexList.size() > 0) {
|
||||
|
||||
result.put(BOOK_INDEX_LIST_KEY, indexList);
|
||||
@ -288,6 +287,7 @@ public class CrawlParser {
|
||||
|
||||
}
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -65,8 +65,7 @@ public interface BookService {
|
||||
* @param book 小说数据
|
||||
* @param bookIndexList 目录集合
|
||||
* @param bookContentList 内容集合
|
||||
* @param existBookIndexMap 已存在的章节Map
|
||||
* */
|
||||
* @param existBookIndexMap 已存在的章节Map */
|
||||
void updateBookAndIndexAndContent(Book book, List<BookIndex> bookIndexList, List<BookContent> bookContentList, Map<Integer, BookIndex> existBookIndexMap);
|
||||
|
||||
/**
|
||||
|
@ -1,6 +1,5 @@
|
||||
package com.java2nb.novel.service.impl;
|
||||
|
||||
import com.java2nb.novel.core.utils.IdWorker;
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
@ -79,10 +78,6 @@ public class BookServiceImpl implements BookService {
|
||||
|
||||
if(bookIndexList.size()>0) {
|
||||
|
||||
if (book.getId() == null) {
|
||||
book.setId(new IdWorker().nextId());
|
||||
}
|
||||
|
||||
//保存小说主表
|
||||
|
||||
bookMapper.insertSelective(book);
|
||||
@ -128,30 +123,14 @@ public class BookServiceImpl implements BookService {
|
||||
BookIndex bookIndex = bookIndexList.get(i);
|
||||
BookContent bookContent = bookContentList.get(i);
|
||||
|
||||
//插入或更新目录
|
||||
Integer wordCount = bookContent.getContent().length();
|
||||
bookIndex.setWordCount(wordCount);
|
||||
bookIndex.setUpdateTime(currentDate);
|
||||
|
||||
if(bookIndex.getId() == null) {
|
||||
if(!existBookIndexMap.containsKey(bookIndex.getIndexNum())) {
|
||||
//插入
|
||||
bookIndex.setBookId(book.getId());
|
||||
Long indexId = new IdWorker().nextId();
|
||||
bookIndex.setId(indexId);
|
||||
bookIndex.setCreateTime(currentDate);
|
||||
bookIndexMapper.insertSelective(bookIndex);
|
||||
}else{
|
||||
//更新
|
||||
bookIndexMapper.updateByPrimaryKeySelective(bookIndex);
|
||||
}
|
||||
|
||||
if(bookContent.getIndexId() == null) {
|
||||
//插入
|
||||
bookContent.setIndexId(bookIndex.getId());
|
||||
bookContentMapper.insertSelective(bookContent);
|
||||
}else{
|
||||
//更新
|
||||
|
||||
bookIndexMapper.updateByPrimaryKeySelective(bookIndex);
|
||||
bookContentMapper.update(update(BookContentDynamicSqlSupport.bookContent)
|
||||
.set(BookContentDynamicSqlSupport.content)
|
||||
.equalTo(bookContent.getContent())
|
||||
@ -160,6 +139,7 @@ public class BookServiceImpl implements BookService {
|
||||
.render(RenderingStrategies.MYBATIS3));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
//更新小说主表
|
||||
@ -174,7 +154,6 @@ public class BookServiceImpl implements BookService {
|
||||
book.setLastIndexUpdateTime(currentDate);
|
||||
}
|
||||
}
|
||||
book.setUpdateTime(currentDate);
|
||||
book.setBookName(null);
|
||||
book.setAuthorName(null);
|
||||
if(Constants.VISIT_COUNT_DEFAULT.equals(book.getVisitCount())) {
|
||||
|
@ -15,8 +15,7 @@
|
||||
|
||||
<select id="queryTotalWordCount" parameterType="long" resultType="int">
|
||||
|
||||
select sum(t2.word_count) from book t1 inner join book_index t2
|
||||
on t1.id = t2.book_id and t1.id = #{bookId}
|
||||
select sum(word_count) from book_index where book_id = #{bookId}
|
||||
</select>
|
||||
|
||||
<update id="updateCrawlLastTime">
|
||||
|
Loading…
x
Reference in New Issue
Block a user