Compare commits

..

6 Commits

9 changed files with 46 additions and 6 deletions

View File

@ -5,7 +5,7 @@
<groupId>com.java2nb</groupId>
<artifactId>novel-admin</artifactId>
<version>5.1.4</version>
<version>5.1.5</version>
<packaging>jar</packaging>
<name>novel-admin</name>

View File

@ -5,7 +5,7 @@
<parent>
<artifactId>novel</artifactId>
<groupId>com.java2nb</groupId>
<version>5.1.4</version>
<version>5.1.5</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -5,7 +5,7 @@
<parent>
<artifactId>novel</artifactId>
<groupId>com.java2nb</groupId>
<version>5.1.4</version>
<version>5.1.5</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -100,6 +100,22 @@ public class CrawlParser {
.replaceAll("<p>\\s*</p>", "")
.replaceAll("<p>", "")
.replaceAll("</p>", "<br/>");
// 小说简介过滤
String filterDesc = ruleBean.getFilterDesc();
if (StringUtils.isNotBlank(filterDesc)) {
String[] filterRules = filterDesc.replace("\r\n", "\n").split("\n");
for (String filterRule : filterRules) {
if (StringUtils.isNotBlank(filterRule)) {
desc = desc.replaceAll(filterRule, "");
}
}
}
// 去除小说简介前后空格
desc = desc.trim();
// 去除小说简介末尾冗余的小说名
if (desc.endsWith(bookName)) {
desc = desc.substring(0, desc.length() - bookName.length());
}
//设置书籍简介
book.setBookDesc(desc);
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
@ -151,7 +167,7 @@ public class CrawlParser {
}
public boolean parseBookIndexAndContent(String sourceBookId, Book book, RuleBean ruleBean,
Map<Integer, BookIndex> existBookIndexMap, CrawlBookChapterHandler handler) throws InterruptedException{
Map<Integer, BookIndex> existBookIndexMap, CrawlBookChapterHandler handler) throws InterruptedException {
Date currentDate = new Date();
@ -238,6 +254,8 @@ public class CrawlParser {
}
}
}
// 去除小说内容末尾的所有换行
content = removeTrailingBrTags(content);
//插入章节目录和章节内容
BookIndex bookIndex = new BookIndex();
bookIndex.setIndexName(indexName);
@ -314,4 +332,12 @@ public class CrawlParser {
return false;
}
/**
* 删除字符串末尾的所有 <br> 类似标签(允许各种空格)
*/
public static String removeTrailingBrTags(String str) {
return str.replaceAll("(?i)(?:\\s*<\\s*br\\s*/?\\s*>)++(?:\\s|\\u3000)*$", "");
}
}

View File

@ -45,6 +45,7 @@ public class RuleBean {
private String visitCountPatten;
private String descStart;
private String descEnd;
private String filterDesc;
private String upadateTimePatten;
private String upadateTimeFormatPatten;
private String bookIndexUrl;

View File

@ -118,6 +118,9 @@
示例:<b>&lt;/p&gt;</b>
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串">
</li>
示例:<b>&lt;span\s+class="allshow"&gt;([^/]+)&lt;/span&gt;</b>
<li><textarea id="filterDesc"
placeholder="过滤简介多个内容换行" rows="5" cols="52"></textarea></li>
示例:<b>更新:(\d+-\d+-\d+\s\d+:\d+:\d+)&lt;/a&gt;</b>
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
placeholder="小说更新时间的正则表达式"></li>
@ -338,6 +341,9 @@
crawlRule.descEnd = descEnd;
var filterDesc = $("#filterDesc").val();
crawlRule.filterDesc = filterDesc;
var upadateTimePatten = $("#upadateTimePatten").val();
if (upadateTimePatten.length > 0) {

View File

@ -119,6 +119,9 @@
示例:<b>&lt;/p&gt;</b>
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串">
</li>
示例:<b>&lt;span\s+class="allshow"&gt;([^/]+)&lt;/span&gt;</b>
<li><textarea id="filterDesc"
placeholder="过滤简介多个内容换行" rows="5" cols="52"></textarea></li>
示例:<b>更新:(\d+-\d+-\d+\s\d+:\d+:\d+)&lt;/a&gt;</b>
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
placeholder="小说更新时间的正则表达式"></li>
@ -266,6 +269,7 @@
$("#visitCountPatten").val(crawlRule.visitCountPatten);
$("#descStart").val(crawlRule.descStart);
$("#descEnd").val(crawlRule.descEnd);
$("#filterDesc").val(crawlRule.filterDesc);
$("#upadateTimePatten").val(crawlRule.upadateTimePatten);
$("#upadateTimeFormatPatten").val(crawlRule.upadateTimeFormatPatten);
$("#bookIndexUrl").val(crawlRule.bookIndexUrl);
@ -424,6 +428,9 @@
crawlRule.descEnd = descEnd;
var filterDesc = $("#filterDesc").val();
crawlRule.filterDesc = filterDesc;
var upadateTimePatten = $("#upadateTimePatten").val();
if (upadateTimePatten.length > 0) {

View File

@ -5,7 +5,7 @@
<parent>
<artifactId>novel</artifactId>
<groupId>com.java2nb</groupId>
<version>5.1.4</version>
<version>5.1.5</version>
</parent>
<modelVersion>4.0.0</modelVersion>

View File

@ -5,7 +5,7 @@
<groupId>com.java2nb</groupId>
<artifactId>novel</artifactId>
<version>5.1.4</version>
<version>5.1.5</version>
<modules>
<module>novel-common</module>
<module>novel-front</module>