mirror of
https://github.com/201206030/novel.git
synced 2025-04-27 07:30:50 +00:00
优化WEB端,可以使用回车直接发送弹幕;爬虫优化
This commit is contained in:
parent
c30fb7965a
commit
95149646f6
@ -95,14 +95,14 @@ public class BookCrawlServiceImpl implements BookCrawlService {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
crawlBook(bookCrawl.getCrawlWebCode());
|
||||
crawlBook(bookCrawl);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
private void crawlBook(int status){
|
||||
private void crawlBook(BookCrawlDO bookCrawl) {
|
||||
for (int i = 1; i <= 7; i++) {
|
||||
|
||||
int finalI = i;
|
||||
@ -110,24 +110,279 @@ public class BookCrawlServiceImpl implements BookCrawlService {
|
||||
() -> {
|
||||
|
||||
try {
|
||||
switch (status) {
|
||||
|
||||
switch (bookCrawl.getCrawlWebCode()) {
|
||||
case 1: {
|
||||
while (true) {
|
||||
if (isInteruptBiquDaoCrawl) {
|
||||
return;
|
||||
}
|
||||
crawBiqudaoBooks(finalI);
|
||||
break;
|
||||
Thread.sleep(1000 * 60 * 60 * 24);
|
||||
}
|
||||
}
|
||||
case 2: {
|
||||
crawBiquTaBooks(finalI);
|
||||
break;
|
||||
while (true) {
|
||||
if (isInteruptBiquTaCrawl) {
|
||||
return;
|
||||
}
|
||||
crawBiquTaBooks(finalI);
|
||||
Thread.sleep(1000 * 60 * 60 * 24);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
bookCrawl.setStatus(0);
|
||||
bookCrawlDao.update(bookCrawl);
|
||||
}
|
||||
|
||||
}
|
||||
).start();
|
||||
}
|
||||
|
||||
|
||||
|
||||
for (int j = 21; j <= 29; j++) {
|
||||
int finalJ = j;
|
||||
new Thread(() -> {
|
||||
|
||||
for (int i = 1; i <= 499; i++) {
|
||||
if(isInteruptBiquTaCrawl || isInteruptBiquDaoCrawl){
|
||||
return;
|
||||
}
|
||||
System.out.println("==============分类============:" + finalJ);
|
||||
System.out.println("==============页码============:" + i);
|
||||
int catId = finalJ;
|
||||
int page = i;
|
||||
|
||||
String bookListUrl = "http://book.sfacg.com/List/default.aspx?&tid=" + catId + "&if=1&PageIndex=" + page;
|
||||
|
||||
String forObject = getByHttpClient(bookListUrl);
|
||||
|
||||
if (forObject != null) {
|
||||
Pattern bookPatten = Pattern.compile("href=\"/Novel/(\\d+)/\"");
|
||||
Matcher bookMatcher = bookPatten.matcher(forObject);
|
||||
boolean isFindBook = bookMatcher.find();
|
||||
|
||||
while (isFindBook) {
|
||||
try {
|
||||
if(isInteruptBiquTaCrawl || isInteruptBiquDaoCrawl){
|
||||
return;
|
||||
}
|
||||
long bookNum = Long.parseLong(bookMatcher.group(1));
|
||||
String bookUrl = "http://book.sfacg.com/Novel/" + bookNum;
|
||||
String forObject1 = getByHttpClient(bookUrl);
|
||||
if (forObject1 != null) {
|
||||
Pattern updateTimePatten = Pattern.compile("更新:(\\d+/\\d+/\\d+ \\d+:\\d+:\\d+)");
|
||||
Matcher updateTimeMatch = updateTimePatten.matcher(forObject1);
|
||||
boolean isFindUpdateTime = updateTimeMatch.find();
|
||||
if (isFindUpdateTime) {
|
||||
String updateTimeStr = updateTimeMatch.group(1);
|
||||
String dateStr = updateTimeStr;
|
||||
int firstPos = dateStr.indexOf("/");
|
||||
String year = dateStr.substring(0, firstPos);
|
||||
dateStr = dateStr.substring(firstPos + 1);
|
||||
firstPos = dateStr.indexOf("/");
|
||||
String month = dateStr.substring(0, firstPos);
|
||||
dateStr = dateStr.substring(firstPos + 1);
|
||||
firstPos = dateStr.indexOf(" ");
|
||||
String day = dateStr.substring(0, firstPos);
|
||||
dateStr = dateStr.substring(firstPos + 1);
|
||||
firstPos = dateStr.indexOf(":");
|
||||
String hour = dateStr.substring(0, firstPos);
|
||||
dateStr = dateStr.substring(firstPos + 1);
|
||||
firstPos = dateStr.indexOf(":");
|
||||
String minus = dateStr.substring(0, firstPos);
|
||||
String second = dateStr.substring(firstPos + 1);
|
||||
if (month.length() == 1) {
|
||||
month = "0" + month;
|
||||
}
|
||||
if (day.length() == 1) {
|
||||
day = "0" + day;
|
||||
}
|
||||
if (hour.length() == 1) {
|
||||
hour = "0" + hour;
|
||||
}
|
||||
if (minus.length() == 1) {
|
||||
minus = "0" + minus;
|
||||
}
|
||||
if (second.length() == 1) {
|
||||
second = "0" + second;
|
||||
}
|
||||
|
||||
|
||||
Date updateTime = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(updateTimeStr);
|
||||
|
||||
|
||||
//Date updateTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(year+"-"+month+"-"+hour+" "+minus+" "+minus);
|
||||
Pattern bookNamePatten = Pattern.compile("<h1 class=\"title\">\\s*" +
|
||||
"<span class=\"text\">([^<]+)</span>\\s*" +
|
||||
"<span");
|
||||
Matcher bookNameMatcher = bookNamePatten.matcher(forObject1);
|
||||
boolean isFindBookName = bookNameMatcher.find();
|
||||
if (isFindBookName) {
|
||||
String bookName = bookNameMatcher.group(1);
|
||||
System.out.println(bookName);
|
||||
Pattern authorPatten = Pattern.compile("<div class=\"author-name\">\\s*" +
|
||||
"<span>([^<]+)</span>\\s*" +
|
||||
"</div>");
|
||||
Matcher authorMatcher = authorPatten.matcher(forObject1);
|
||||
boolean isFindAuthor = authorMatcher.find();
|
||||
if (isFindAuthor) {
|
||||
String author = authorMatcher.group(1);
|
||||
|
||||
Pattern picPtten = Pattern.compile("src=\"(http://rs.sfacg.com/web/novel/images/NovelCover/Big/[^\"]+)\"");
|
||||
Matcher picMatcher = picPtten.matcher(forObject1);
|
||||
if (picMatcher.find()) {
|
||||
String pic = picMatcher.group(1);
|
||||
|
||||
Pattern visitPatten = Pattern.compile(">点击:(\\d+)<");
|
||||
Matcher visitMatcher = visitPatten.matcher(forObject1);
|
||||
boolean isFindVisit = visitMatcher.find();
|
||||
if (isFindVisit) {
|
||||
String visit = visitMatcher.group(1);
|
||||
|
||||
Pattern statusPatten = Pattern.compile(">字数:\\d+字\\[([^<]+)\\]<");
|
||||
Matcher statusMatcher = statusPatten.matcher(forObject1);
|
||||
boolean isFindStatus = statusMatcher.find();
|
||||
if (isFindStatus) {
|
||||
String status = statusMatcher.group(1);
|
||||
|
||||
if ("已完结".equals(status)) {//先爬已完结的
|
||||
|
||||
status = "已完成";
|
||||
}
|
||||
|
||||
|
||||
Pattern scorePatten = Pattern.compile("<div class=\"num\">\\s*" +
|
||||
"<span>(\\d+\\.\\d+)</span>\\s*" +
|
||||
"</div>");
|
||||
Matcher scoreMather = scorePatten.matcher(forObject1);
|
||||
boolean isFindScore = scoreMather.find();
|
||||
if (isFindScore) {
|
||||
|
||||
float score = Float.parseFloat(scoreMather.group(1));
|
||||
//if (score >= 7.0) {
|
||||
|
||||
Pattern descPatten = Pattern.compile("<p class=\"introduce\">\\s*" +
|
||||
"([^<]+)\\s*</p>");
|
||||
Matcher descMatcher = descPatten.matcher(forObject1);
|
||||
boolean isFindDesc = descMatcher.find();
|
||||
if (isFindDesc) {
|
||||
String desc = descMatcher.group(1);
|
||||
|
||||
Pattern tagPatten = Pattern.compile("<li class=\"tag\">\\s*" +
|
||||
"<a href=\"/stag/\\d+/\" class=\"highlight\"><span class=\"icn\">[^<]+</span><span class=\"text\">([^<]+)</span></a>\\s*" +
|
||||
"</li>");
|
||||
Matcher tagMatch = tagPatten.matcher(forObject1);
|
||||
String tag = "";
|
||||
boolean isFindTag = tagMatch.find();
|
||||
while (isFindTag) {
|
||||
tag += ("," + tagMatch.group(1));
|
||||
isFindTag = tagMatch.find();
|
||||
}
|
||||
|
||||
if (tag.length() > 0) {
|
||||
tag = tag.substring(1);
|
||||
}
|
||||
|
||||
|
||||
BookDO book = new BookDO();
|
||||
book.setAuthor(author);
|
||||
book.setCatid(8);
|
||||
book.setBookDesc(desc);
|
||||
book.setBookName(bookName);
|
||||
book.setSoftTag(tag);
|
||||
book.setSoftCat(catId);
|
||||
book.setScore(score > 10 ? 8.0f : score);
|
||||
book.setVisitCount(Long.parseLong(visit));
|
||||
book.setPicUrl(pic);
|
||||
book.setBookStatus(status);
|
||||
book.setUpdateTime(updateTime);
|
||||
|
||||
List<BookIndexDO> indexList = new ArrayList<>();
|
||||
List<BookContentDO> contentList = new ArrayList<>();
|
||||
|
||||
//读取目录
|
||||
String indexUrl = "http://book.sfacg.com/Novel/" + bookNum + "/MainIndex/";
|
||||
String forObject2 = getByHttpClient(indexUrl);
|
||||
if (forObject2 != null) {
|
||||
Pattern indexListPatten = Pattern.compile("href=\"(/Novel/\\d+/\\d+/\\d+/)\"\\s+title=\"([^\"]+)\\s*");
|
||||
Matcher indexListMatch = indexListPatten.matcher(forObject2);
|
||||
|
||||
boolean isFindIndex = indexListMatch.find();
|
||||
|
||||
int indexNum = 0;
|
||||
|
||||
//查询该书籍已存在目录号
|
||||
List<Integer> hasIndexNum = queryIndexCountByBookNameAndBAuthor(bookName, author);
|
||||
|
||||
while (isFindIndex) {
|
||||
if(isInteruptBiquTaCrawl || isInteruptBiquDaoCrawl){
|
||||
return;
|
||||
}
|
||||
if (!hasIndexNum.contains(indexNum)) {
|
||||
|
||||
String contentUrl = "http://book.sfacg.com" + indexListMatch.group(1);
|
||||
String indexName = indexListMatch.group(2);
|
||||
|
||||
|
||||
//查询章节内容
|
||||
String forObject3 = getByHttpClient(contentUrl);
|
||||
if (forObject3 != null && !forObject3.contains("内容整改中,请等待")) {
|
||||
String content = forObject3.substring(forObject3.indexOf("<div class=\"article-content"));
|
||||
content = content.substring(0, content.indexOf("</div>") + 6);
|
||||
//TODO插入章节目录和章节内容
|
||||
BookIndexDO bookIndex = new BookIndexDO();
|
||||
bookIndex.setIndexName(indexName);
|
||||
bookIndex.setIndexNum(indexNum);
|
||||
indexList.add(bookIndex);
|
||||
BookContentDO bookContent = new BookContentDO();
|
||||
bookContent.setContent(content);
|
||||
bookContent.setIndexNum(indexNum);
|
||||
contentList.add(bookContent);
|
||||
|
||||
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
indexNum++;
|
||||
isFindIndex = indexListMatch.find();
|
||||
}
|
||||
if (indexList.size() == contentList.size() && indexList.size() > 0) {
|
||||
saveBookAndIndexAndContent(book, indexList, contentList);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
isFindBook = bookMatcher.find();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}).start();
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void crawBiquTaBooks(int i) {
|
||||
@ -630,8 +885,6 @@ public class BookCrawlServiceImpl implements BookCrawlService {
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -35,8 +35,8 @@ spring:
|
||||
type: com.alibaba.druid.pool.DruidDataSource
|
||||
driverClassName: com.mysql.jdbc.Driver
|
||||
url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf8&serverTimezone=Asia/Shanghai
|
||||
username: root
|
||||
password: test123456
|
||||
username: books
|
||||
password: books
|
||||
#password:
|
||||
initialSize: 1
|
||||
minIdle: 3
|
||||
|
10
novel-admin/src/main/resources/crawl.properties
Normal file
10
novel-admin/src/main/resources/crawl.properties
Normal file
@ -0,0 +1,10 @@
|
||||
#爬虫线程数
|
||||
threadCount=1
|
||||
#爬取优先级 1:评分优先 2:更新时间优先
|
||||
priority=1
|
||||
#小说最低评分,0表示不限制
|
||||
lowestScore=0
|
||||
#小说最小更新时间
|
||||
minUptTime=2000-01-01 00:00:00
|
||||
#爬取最大条数,0表示不限制
|
||||
maxNumber=0
|
@ -159,14 +159,15 @@ function detail(id) {
|
||||
});
|
||||
}
|
||||
|
||||
function edit(id) {
|
||||
function edit(){
|
||||
console.log('打开配置页面');
|
||||
layer.open({
|
||||
type : 2,
|
||||
title: '编辑',
|
||||
title : '增加',
|
||||
maxmin : true,
|
||||
shadeClose: false, // 点击遮罩关闭层
|
||||
shadeClose : false,
|
||||
area : [ '800px', '520px' ],
|
||||
content: prefix + '/edit/' + id // iframe的url
|
||||
content : prefix + '/edit'
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -9,14 +9,9 @@
|
||||
<div class="ibox-body">
|
||||
<div class="fixed-table-toolbar">
|
||||
<div class="columns pull-left">
|
||||
<button shiro:hasPermission="books:bookCrawl:add" type="button"
|
||||
class="btn btn-primary" onclick="add()">
|
||||
<i class="fa fa-plus" aria-hidden="true"></i>添加
|
||||
</button>
|
||||
<button shiro:hasPermission="books:bookCrawl:batchRemove" type="button"
|
||||
class="btn btn-danger"
|
||||
onclick="batchRemove()">
|
||||
<i class="fa fa-trash" aria-hidden="true"></i>删除
|
||||
<button type="button" class="btn btn-info"
|
||||
onclick="edit()">
|
||||
<i class="" aria-hidden="true"></i>爬虫配置
|
||||
</button>
|
||||
</div>
|
||||
<div class="columns pull-right">
|
||||
|
@ -1,5 +1,5 @@
|
||||
server:
|
||||
port: 80
|
||||
port: 8080
|
||||
|
||||
spring:
|
||||
datasource:
|
||||
|
@ -309,35 +309,16 @@
|
||||
// 弹幕定时器
|
||||
var timers = [];
|
||||
// 控制弹幕显隐变量
|
||||
// 监听发送按钮
|
||||
$(".send").on("click", function () {
|
||||
var bullet = $("#screenBulletText").val();
|
||||
var contentId = $("#contentIdHidden").val();
|
||||
if (bullet && contentId) {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/book/sendBullet",
|
||||
contentType: 'application/x-www-form-urlencoded;charset=utf-8',
|
||||
data: {contentId: contentId, bullet: bullet},
|
||||
dataType: "json",
|
||||
success: function (data) {
|
||||
console.log(data);
|
||||
var bullet = $("#screenBulletText").val("")
|
||||
},
|
||||
error: function (e) {
|
||||
console.log(e);
|
||||
layer.alert("发送失败");
|
||||
return;
|
||||
//监听键盘事件
|
||||
$("#screenBulletText").keypress(function (even) {
|
||||
if (even.which == 13) {
|
||||
//enter键按下
|
||||
sendBullet();
|
||||
}
|
||||
});
|
||||
} else {
|
||||
layer.alert("发送内容不能为空");
|
||||
return;
|
||||
}
|
||||
// 创建弹幕
|
||||
var jqueryDom = createScreenbullet(bullet);
|
||||
// 添加定时任务
|
||||
addInterval(jqueryDom);
|
||||
// 监听发送按钮
|
||||
$(".send").on("click", function () {
|
||||
sendBullet();
|
||||
});
|
||||
// 监听关闭弹幕按钮
|
||||
$("[lay-skin='_switch']").click(function () {
|
||||
@ -488,6 +469,36 @@
|
||||
while(true) if(new Date().getTime()-start > n) break;
|
||||
|
||||
}
|
||||
//发送弹幕
|
||||
function sendBullet(){
|
||||
var bullet = $("#screenBulletText").val();
|
||||
var contentId = $("#contentIdHidden").val();
|
||||
if (bullet && contentId) {
|
||||
$.ajax({
|
||||
type: "POST",
|
||||
url: "/book/sendBullet",
|
||||
contentType: 'application/x-www-form-urlencoded;charset=utf-8',
|
||||
data: {contentId: contentId, bullet: bullet},
|
||||
dataType: "json",
|
||||
success: function (data) {
|
||||
console.log(data);
|
||||
var bullet = $("#screenBulletText").val("")
|
||||
},
|
||||
error: function (e) {
|
||||
console.log(e);
|
||||
layer.alert("发送失败");
|
||||
return;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
layer.alert("发送内容不能为空");
|
||||
return;
|
||||
}
|
||||
// 创建弹幕
|
||||
var jqueryDom = createScreenbullet(bullet);
|
||||
// 添加定时任务
|
||||
addInterval(jqueryDom);
|
||||
}
|
||||
</script>
|
||||
|
||||
</html>
|
Loading…
x
Reference in New Issue
Block a user