diff --git a/novel-common/pom.xml b/novel-common/pom.xml index a42e663..a761e8c 100644 --- a/novel-common/pom.xml +++ b/novel-common/pom.xml @@ -5,7 +5,7 @@ novel com.java2nb - 2.5.0 + 2.5.1 4.0.0 diff --git a/novel-crawl/pom.xml b/novel-crawl/pom.xml index 759e827..f5fdc87 100644 --- a/novel-crawl/pom.xml +++ b/novel-crawl/pom.xml @@ -5,7 +5,7 @@ novel com.java2nb - 2.5.0 + 2.5.1 4.0.0 diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java index a8fbe52..66e0cd1 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java @@ -96,8 +96,12 @@ public class CrawlParser { String desc = bookDetailHtml.substring(bookDetailHtml.indexOf(ruleBean.getDescStart()) + ruleBean.getDescStart().length()); desc = desc.substring(0, desc.indexOf(ruleBean.getDescEnd())); - //过滤掉简介中的a标签 - desc = desc.replaceAll("",""); + //过滤掉简介中的特殊标签 + desc = desc.replaceAll("","") + .replaceAll("","") + .replaceAll("

\\s*

","") + .replaceAll("

","") + .replaceAll("

","
"); //设置书籍简介 book.setBookDesc(desc); if (StringUtils.isNotBlank(ruleBean.getStatusPatten())) { diff --git a/novel-front/pom.xml b/novel-front/pom.xml index e477ad0..0227160 100644 --- a/novel-front/pom.xml +++ b/novel-front/pom.xml @@ -5,7 +5,7 @@ novel com.java2nb - 2.5.0 + 2.5.1 4.0.0 diff --git a/pom.xml b/pom.xml index 9612117..1b20eb9 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ com.java2nb novel - 2.5.0 + 2.5.1 novel-common novel-front diff --git a/sql/20200608.sql b/sql/20200608.sql new file mode 100644 index 0000000..350ab86 --- /dev/null +++ b/sql/20200608.sql @@ -0,0 +1,2 @@ +INSERT INTO `crawl_source` (`id`, `source_name`, `crawl_rule`, `source_status`, `create_time`, `update_time`) VALUES +(6, '新笔趣阁', '{\n \"bookListUrl\": \"http://www.xbiquge.la/fenlei/{catId}_{page}.html\",\n \"catIdRule\": {\n \"catId1\": \"1\",\n \"catId2\": \"2\",\n \"catId3\": \"3\",\n \"catId4\": \"4\",\n \"catId5\": \"6\",\n \"catId6\": \"5\"\n },\n \"bookIdPatten\": \"\",\n \"pagePatten\": \"(\\\\d+)/\\\\d+\",\n \"totalPagePatten\": \"\\\\d+/(\\\\d+)\",\n \"bookDetailUrl\": \"http://www.xbiquge.la/{bookId}/\",\n \"bookNamePatten\": \"

([^/]+)

\",\n \"authorNamePatten\": \"者:([^/]+)

\",\n \"picUrlPatten\": \"src=\\\"(http://www.xbiquge.la/files/article/image/\\\\d+/\\\\d+/\\\\d+s\\\\.jpg)\\\"\",\n \"bookStatusRule\": {},\n \"descStart\": \"
\",\n \"descEnd\": \"
\",\n \"upadateTimePatten\": \"

最后更新:(\\\\d+-\\\\d+-\\\\d+\\\\s\\\\d+:\\\\d+:\\\\d+)

\",\n \"upadateTimeFormatPatten\": \"yyyy-MM-dd HH:mm:ss\",\n \"bookIndexUrl\": \"http://www.xbiquge.la/{bookId}/\",\n \"indexIdPatten\": \"[^/]+\",\n \"indexNamePatten\": \"([^/]+)\",\n \"bookContentUrl\": \"http://www.xbiquge.la/{bookId}/{indexId}.html\",\n \"contentStart\": \"
\",\n \"contentEnd\": \"

\"\n}', 0, '2020-05-23 22:46:58', '2020-05-23 22:46:58');