diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2b10a2b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/novel-front/novel-front.iml
diff --git a/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java b/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java
index 23f9e49..b128d7f 100644
--- a/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java
+++ b/novel-admin/src/main/java/com/java2nb/books/service/impl/BookCrawlServiceImpl.java
@@ -22,8 +22,11 @@ import java.util.regex.Pattern;
import com.java2nb.books.dao.BookCrawlDao;
import com.java2nb.books.domain.BookCrawlDO;
import com.java2nb.books.service.BookCrawlService;
+import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.client.RestTemplate;
+import static java.util.regex.Pattern.*;
+
@Service
public class BookCrawlServiceImpl implements BookCrawlService {
@@ -181,7 +184,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
String bookListUrl = "http://book.sfacg.com/List/default.aspx?&tid=" + catId + "&if=1&PageIndex=" + page;
- String forObject = getByHttpClient(bookListUrl);
+ String forObject = getByTemplate(bookListUrl);
if (forObject != null) {
Pattern bookPatten = Pattern.compile("href=\"/Novel/(\\d+)/\"");
@@ -195,7 +198,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
}
long bookNum = Long.parseLong(bookMatcher.group(1));
String bookUrl = "http://book.sfacg.com/Novel/" + bookNum;
- String forObject1 = getByHttpClient(bookUrl);
+ String forObject1 = getByTemplate(bookUrl);
if (forObject1 != null) {
Pattern updateTimePatten = Pattern.compile("更新:(\\d+/\\d+/\\d+ \\d+:\\d+:\\d+)");
Matcher updateTimeMatch = updateTimePatten.matcher(forObject1);
@@ -329,7 +332,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
//读取目录
String indexUrl = "http://book.sfacg.com/Novel/" + bookNum + "/MainIndex/";
- String forObject2 = getByHttpClient(indexUrl);
+ String forObject2 = getByTemplate(indexUrl);
if (forObject2 != null) {
Pattern indexListPatten = Pattern.compile("href=\"(/Novel/\\d+/\\d+/\\d+/)\"\\s+title=\"([^\"]+)\\s*");
Matcher indexListMatch = indexListPatten.matcher(forObject2);
@@ -352,7 +355,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
//查询章节内容
- String forObject3 = getByHttpClient(contentUrl);
+ String forObject3 = getByTemplate(contentUrl);
if (forObject3 != null && !forObject3.contains("内容整改中,请等待")) {
String content = forObject3.substring(forObject3.indexOf("
") + 6);
@@ -413,13 +416,13 @@ public class BookCrawlServiceImpl implements BookCrawlService {
catBookListUrlBase = baseUrl + "/lhb/";
}
//拼接分类URL
- int page = 1;//起始页码
+ int page = 1;
int totalPage = page;
String catBookListUrl = catBookListUrlBase + i + "/" + page + ".html";
- String forObject = getByHttpClient(catBookListUrl);
+ String forObject = getByTemplate(catBookListUrl);
if (forObject != null) {
//匹配分页数
- Pattern pattern = Pattern.compile("value=\"(\\d+)/(\\d+)\"");
+ Pattern pattern = compile("value=\"(\\d+)/(\\d+)\"");
Matcher matcher = pattern.matcher(forObject);
boolean isFind = matcher.find();
System.out.println("匹配分页数" + isFind);
@@ -427,7 +430,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
int currentPage = Integer.parseInt(matcher.group(1));
totalPage = Integer.parseInt(matcher.group(2));
//解析第一页书籍的数据
- Pattern bookPatten = Pattern.compile("href=\"/(\\d+_\\d+)/\"");
+ Pattern bookPatten = compile("href=\"/(\\d+_\\d+)/\"");
parseBiquTaBook(bookPatten, forObject, i, baseUrl);
while (currentPage < totalPage) {
if (isInteruptBiquTaCrawl) {
@@ -435,7 +438,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
}
catBookListUrl = catBookListUrlBase + i + "/" + (currentPage + 1) + ".html";
- forObject = getByHttpClient(catBookListUrl);
+ forObject = getByTemplate(catBookListUrl);
if (forObject != null) {
//匹配分页数
matcher = pattern.matcher(forObject);
@@ -457,15 +460,15 @@ public class BookCrawlServiceImpl implements BookCrawlService {
private void parseBiquTaBook(Pattern bookPatten, String forObject, int catNum, String baseUrl) {
Matcher matcher2 = bookPatten.matcher(forObject);
boolean isFind = matcher2.find();
- Pattern scorePatten = Pattern.compile("
(\\d+\\.\\d+)分
");
+ Pattern scorePatten = compile("
(\\d+\\.\\d+)分
");
Matcher scoreMatch = scorePatten.matcher(forObject);
boolean scoreFind = scoreMatch.find();
- Pattern bookNamePatten = Pattern.compile("
([^/]+)
");
+ Pattern bookNamePatten = compile("
([^/]+)
");
Matcher bookNameMatch = bookNamePatten.matcher(forObject);
boolean isBookNameMatch = bookNameMatch.find();
- Pattern authorPatten = Pattern.compile(">作者:([^/]+)<");
+ Pattern authorPatten = compile(">作者:([^/]+)<");
Matcher authoreMatch = authorPatten.matcher(forObject);
boolean isFindAuthor = authoreMatch.find();
@@ -498,13 +501,13 @@ public class BookCrawlServiceImpl implements BookCrawlService {
String bokNum = matcher2.group(1);
String bookUrl = baseUrl + "/" + bokNum + "/";
- String body = getByHttpClient(bookUrl);
+ String body = getByTemplate(bookUrl);
if (body != null) {
- Pattern statusPatten = Pattern.compile("状态:([^/]+)");
+ Pattern statusPatten = compile("状态:([^/]+)");
Matcher statusMatch = statusPatten.matcher(body);
if (statusMatch.find()) {
String status = statusMatch.group(1);
- Pattern updateTimePatten = Pattern.compile("更新:(\\d+-\\d+-\\d+\\s\\d+:\\d+:\\d+)");
+ Pattern updateTimePatten = compile("更新:(\\d+-\\d+-\\d+\\s\\d+:\\d+:\\d+)");
Matcher updateTimeMatch = updateTimePatten.matcher(body);
if (updateTimeMatch.find()) {
String updateTimeStr = updateTimeMatch.group(1);
@@ -513,12 +516,12 @@ public class BookCrawlServiceImpl implements BookCrawlService {
if (updateTime.getTime() < new SimpleDateFormat("yyyy-MM-dd").parse(crawlConfig.getMinUptTime()).getTime()) {
continue;
}
- Pattern picPatten = Pattern.compile("

]+)\"\\s+onerror=\"this.src=");
+ Pattern picPatten = compile("

]+)\"\\s+onerror=\"this.src=");
Matcher picMather = picPatten.matcher(body);
if (picMather.find()) {
String picSrc = picMather.group(1);
- Pattern descPatten = Pattern.compile("class=\"review\">([^<]+)");
+ Pattern descPatten = compile("class=\"review\">([^<]+)");
Matcher descMatch = descPatten.matcher(body);
if (descMatch.find()) {
String desc = descMatch.group(1);
@@ -538,13 +541,13 @@ public class BookCrawlServiceImpl implements BookCrawlService {
List
contentList = new ArrayList<>();
//读取目录
- Pattern indexPatten = Pattern.compile("查看完整目录");
+ Pattern indexPatten = compile("查看完整目录");
Matcher indexMatch = indexPatten.matcher(body);
if (indexMatch.find()) {
String indexUrl = baseUrl + indexMatch.group(1);
- String body2 = getByHttpClient(indexUrl);
+ String body2 = getByTemplate(indexUrl);
if (body2 != null) {
- Pattern indexListPatten = Pattern.compile("([^/]+)");
+ Pattern indexListPatten = compile("([^/]+)");
Matcher indexListMatch = indexListPatten.matcher(body2);
boolean isFindIndex = indexListMatch.find();
@@ -565,7 +568,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
//查询章节内容
- String body3 = getByHttpClient(contentUrl.replace("//m.","//www."));
+ String body3 = getByTemplate(contentUrl.replace("//m.","//www."));
if (body3 != null) {
String start = "id=\"content\">";
String end = "