perf(novel-crawl): 增加小说简介过滤规则

This commit is contained in:
xiongxiaoyang 2025-06-21 17:54:59 +08:00
parent 0830f6ffeb
commit 1534220f0c
4 changed files with 24 additions and 0 deletions

View File

@ -100,6 +100,16 @@ public class CrawlParser {
.replaceAll("<p>\\s*</p>", "")
.replaceAll("<p>", "")
.replaceAll("</p>", "<br/>");
// 小说简介过滤
String filterDesc = ruleBean.getFilterDesc();
if (StringUtils.isNotBlank(filterDesc)) {
String[] filterRules = filterDesc.replace("\r\n", "\n").split("\n");
for (String filterRule : filterRules) {
if (StringUtils.isNotBlank(filterRule)) {
desc = desc.replaceAll(filterRule, "");
}
}
}
//设置书籍简介
book.setBookDesc(desc);
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {

View File

@ -45,6 +45,7 @@ public class RuleBean {
private String visitCountPatten;
private String descStart;
private String descEnd;
private String filterDesc;
private String upadateTimePatten;
private String upadateTimeFormatPatten;
private String bookIndexUrl;

View File

@ -118,6 +118,9 @@
示例<b>&lt;/p&gt;</b>
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串">
</li>
示例<b>&lt;span\s+class="allshow"&gt;([^/]+)&lt;/span&gt;</b>
<li><textarea id="filterDesc"
placeholder="过滤简介(多个内容换行)" rows="5" cols="52"></textarea></li>
示例<b>更新(\d+-\d+-\d+\s\d+:\d+:\d+)&lt;/a&gt;</b>
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
placeholder="小说更新时间的正则表达式:"></li>
@ -338,6 +341,9 @@
crawlRule.descEnd = descEnd;
var filterDesc = $("#filterDesc").val();
crawlRule.filterDesc = filterDesc;
var upadateTimePatten = $("#upadateTimePatten").val();
if (upadateTimePatten.length > 0) {

View File

@ -119,6 +119,9 @@
示例<b>&lt;/p&gt;</b>
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串">
</li>
示例<b>&lt;span\s+class="allshow"&gt;([^/]+)&lt;/span&gt;</b>
<li><textarea id="filterDesc"
placeholder="过滤简介(多个内容换行)" rows="5" cols="52"></textarea></li>
示例<b>更新(\d+-\d+-\d+\s\d+:\d+:\d+)&lt;/a&gt;</b>
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
placeholder="小说更新时间的正则表达式:"></li>
@ -266,6 +269,7 @@
$("#visitCountPatten").val(crawlRule.visitCountPatten);
$("#descStart").val(crawlRule.descStart);
$("#descEnd").val(crawlRule.descEnd);
$("#filterDesc").val(crawlRule.filterDesc);
$("#upadateTimePatten").val(crawlRule.upadateTimePatten);
$("#upadateTimeFormatPatten").val(crawlRule.upadateTimeFormatPatten);
$("#bookIndexUrl").val(crawlRule.bookIndexUrl);
@ -424,6 +428,9 @@
crawlRule.descEnd = descEnd;
var filterDesc = $("#filterDesc").val();
crawlRule.filterDesc = filterDesc;
var upadateTimePatten = $("#upadateTimePatten").val();
if (upadateTimePatten.length > 0) {