mirror of
https://github.com/201206030/novel-plus.git
synced 2025-06-23 04:18:30 +00:00
perf(novel-crawl): 增加小说简介过滤规则
This commit is contained in:
parent
0830f6ffeb
commit
1534220f0c
@ -100,6 +100,16 @@ public class CrawlParser {
|
||||
.replaceAll("<p>\\s*</p>", "")
|
||||
.replaceAll("<p>", "")
|
||||
.replaceAll("</p>", "<br/>");
|
||||
// 小说简介过滤
|
||||
String filterDesc = ruleBean.getFilterDesc();
|
||||
if (StringUtils.isNotBlank(filterDesc)) {
|
||||
String[] filterRules = filterDesc.replace("\r\n", "\n").split("\n");
|
||||
for (String filterRule : filterRules) {
|
||||
if (StringUtils.isNotBlank(filterRule)) {
|
||||
desc = desc.replaceAll(filterRule, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
//设置书籍简介
|
||||
book.setBookDesc(desc);
|
||||
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
|
||||
|
@ -45,6 +45,7 @@ public class RuleBean {
|
||||
private String visitCountPatten;
|
||||
private String descStart;
|
||||
private String descEnd;
|
||||
private String filterDesc;
|
||||
private String upadateTimePatten;
|
||||
private String upadateTimeFormatPatten;
|
||||
private String bookIndexUrl;
|
||||
|
@ -118,6 +118,9 @@
|
||||
示例:<b></p></b>
|
||||
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串:">
|
||||
</li>
|
||||
示例:<b><span\s+class="allshow">([^/]+)</span></b>
|
||||
<li><textarea id="filterDesc"
|
||||
placeholder="过滤简介(多个内容换行)" rows="5" cols="52"></textarea></li>
|
||||
示例:<b>更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a></b>
|
||||
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
|
||||
placeholder="小说更新时间的正则表达式:"></li>
|
||||
@ -338,6 +341,9 @@
|
||||
|
||||
crawlRule.descEnd = descEnd;
|
||||
|
||||
var filterDesc = $("#filterDesc").val();
|
||||
crawlRule.filterDesc = filterDesc;
|
||||
|
||||
var upadateTimePatten = $("#upadateTimePatten").val();
|
||||
|
||||
if (upadateTimePatten.length > 0) {
|
||||
|
@ -119,6 +119,9 @@
|
||||
示例:<b></p></b>
|
||||
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串:">
|
||||
</li>
|
||||
示例:<b><span\s+class="allshow">([^/]+)</span></b>
|
||||
<li><textarea id="filterDesc"
|
||||
placeholder="过滤简介(多个内容换行)" rows="5" cols="52"></textarea></li>
|
||||
示例:<b>更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a></b>
|
||||
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
|
||||
placeholder="小说更新时间的正则表达式:"></li>
|
||||
@ -266,6 +269,7 @@
|
||||
$("#visitCountPatten").val(crawlRule.visitCountPatten);
|
||||
$("#descStart").val(crawlRule.descStart);
|
||||
$("#descEnd").val(crawlRule.descEnd);
|
||||
$("#filterDesc").val(crawlRule.filterDesc);
|
||||
$("#upadateTimePatten").val(crawlRule.upadateTimePatten);
|
||||
$("#upadateTimeFormatPatten").val(crawlRule.upadateTimeFormatPatten);
|
||||
$("#bookIndexUrl").val(crawlRule.bookIndexUrl);
|
||||
@ -424,6 +428,9 @@
|
||||
|
||||
crawlRule.descEnd = descEnd;
|
||||
|
||||
var filterDesc = $("#filterDesc").val();
|
||||
crawlRule.filterDesc = filterDesc;
|
||||
|
||||
var upadateTimePatten = $("#upadateTimePatten").val();
|
||||
|
||||
if (upadateTimePatten.length > 0) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user