mirror of
https://github.com/201206030/novel-plus.git
synced 2025-06-23 04:18:30 +00:00
perf(novel-crawl): 增加小说简介过滤规则
This commit is contained in:
parent
0830f6ffeb
commit
1534220f0c
@ -100,6 +100,16 @@ public class CrawlParser {
|
|||||||
.replaceAll("<p>\\s*</p>", "")
|
.replaceAll("<p>\\s*</p>", "")
|
||||||
.replaceAll("<p>", "")
|
.replaceAll("<p>", "")
|
||||||
.replaceAll("</p>", "<br/>");
|
.replaceAll("</p>", "<br/>");
|
||||||
|
// 小说简介过滤
|
||||||
|
String filterDesc = ruleBean.getFilterDesc();
|
||||||
|
if (StringUtils.isNotBlank(filterDesc)) {
|
||||||
|
String[] filterRules = filterDesc.replace("\r\n", "\n").split("\n");
|
||||||
|
for (String filterRule : filterRules) {
|
||||||
|
if (StringUtils.isNotBlank(filterRule)) {
|
||||||
|
desc = desc.replaceAll(filterRule, "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
//设置书籍简介
|
//设置书籍简介
|
||||||
book.setBookDesc(desc);
|
book.setBookDesc(desc);
|
||||||
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
|
if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) {
|
||||||
|
@ -45,6 +45,7 @@ public class RuleBean {
|
|||||||
private String visitCountPatten;
|
private String visitCountPatten;
|
||||||
private String descStart;
|
private String descStart;
|
||||||
private String descEnd;
|
private String descEnd;
|
||||||
|
private String filterDesc;
|
||||||
private String upadateTimePatten;
|
private String upadateTimePatten;
|
||||||
private String upadateTimeFormatPatten;
|
private String upadateTimeFormatPatten;
|
||||||
private String bookIndexUrl;
|
private String bookIndexUrl;
|
||||||
|
@ -118,6 +118,9 @@
|
|||||||
示例:<b></p></b>
|
示例:<b></p></b>
|
||||||
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串:">
|
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串:">
|
||||||
</li>
|
</li>
|
||||||
|
示例:<b><span\s+class="allshow">([^/]+)</span></b>
|
||||||
|
<li><textarea id="filterDesc"
|
||||||
|
placeholder="过滤简介(多个内容换行)" rows="5" cols="52"></textarea></li>
|
||||||
示例:<b>更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a></b>
|
示例:<b>更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a></b>
|
||||||
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
|
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
|
||||||
placeholder="小说更新时间的正则表达式:"></li>
|
placeholder="小说更新时间的正则表达式:"></li>
|
||||||
@ -338,6 +341,9 @@
|
|||||||
|
|
||||||
crawlRule.descEnd = descEnd;
|
crawlRule.descEnd = descEnd;
|
||||||
|
|
||||||
|
var filterDesc = $("#filterDesc").val();
|
||||||
|
crawlRule.filterDesc = filterDesc;
|
||||||
|
|
||||||
var upadateTimePatten = $("#upadateTimePatten").val();
|
var upadateTimePatten = $("#upadateTimePatten").val();
|
||||||
|
|
||||||
if (upadateTimePatten.length > 0) {
|
if (upadateTimePatten.length > 0) {
|
||||||
|
@ -119,6 +119,9 @@
|
|||||||
示例:<b></p></b>
|
示例:<b></p></b>
|
||||||
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串:">
|
<li><input type="text" id="descEnd" class="s_input icon_key" placeholder="小说简介结束截取字符串:">
|
||||||
</li>
|
</li>
|
||||||
|
示例:<b><span\s+class="allshow">([^/]+)</span></b>
|
||||||
|
<li><textarea id="filterDesc"
|
||||||
|
placeholder="过滤简介(多个内容换行)" rows="5" cols="52"></textarea></li>
|
||||||
示例:<b>更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a></b>
|
示例:<b>更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a></b>
|
||||||
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
|
<li><input type="text" id="upadateTimePatten" class="s_input icon_key"
|
||||||
placeholder="小说更新时间的正则表达式:"></li>
|
placeholder="小说更新时间的正则表达式:"></li>
|
||||||
@ -266,6 +269,7 @@
|
|||||||
$("#visitCountPatten").val(crawlRule.visitCountPatten);
|
$("#visitCountPatten").val(crawlRule.visitCountPatten);
|
||||||
$("#descStart").val(crawlRule.descStart);
|
$("#descStart").val(crawlRule.descStart);
|
||||||
$("#descEnd").val(crawlRule.descEnd);
|
$("#descEnd").val(crawlRule.descEnd);
|
||||||
|
$("#filterDesc").val(crawlRule.filterDesc);
|
||||||
$("#upadateTimePatten").val(crawlRule.upadateTimePatten);
|
$("#upadateTimePatten").val(crawlRule.upadateTimePatten);
|
||||||
$("#upadateTimeFormatPatten").val(crawlRule.upadateTimeFormatPatten);
|
$("#upadateTimeFormatPatten").val(crawlRule.upadateTimeFormatPatten);
|
||||||
$("#bookIndexUrl").val(crawlRule.bookIndexUrl);
|
$("#bookIndexUrl").val(crawlRule.bookIndexUrl);
|
||||||
@ -424,6 +428,9 @@
|
|||||||
|
|
||||||
crawlRule.descEnd = descEnd;
|
crawlRule.descEnd = descEnd;
|
||||||
|
|
||||||
|
var filterDesc = $("#filterDesc").val();
|
||||||
|
crawlRule.filterDesc = filterDesc;
|
||||||
|
|
||||||
var upadateTimePatten = $("#upadateTimePatten").val();
|
var upadateTimePatten = $("#upadateTimePatten").val();
|
||||||
|
|
||||||
if (upadateTimePatten.length > 0) {
|
if (upadateTimePatten.length > 0) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user