优化WEB端,可以使用回车直接发送弹幕;爬虫优化

This commit is contained in:
xiongxiaoyang 2019-11-15 14:44:20 +08:00
parent c30fb7965a
commit 95149646f6
7 changed files with 823 additions and 553 deletions

View File

@ -45,37 +45,37 @@ public class BookCrawlServiceImpl implements BookCrawlService {
private BookContentDao bookContentDao;
@Override
public BookCrawlDO get(Long id){
public BookCrawlDO get(Long id) {
return bookCrawlDao.get(id);
}
@Override
public List<BookCrawlDO> list(Map<String, Object> map){
public List<BookCrawlDO> list(Map<String, Object> map) {
return bookCrawlDao.list(map);
}
@Override
public int count(Map<String, Object> map){
public int count(Map<String, Object> map) {
return bookCrawlDao.count(map);
}
@Override
public int save(BookCrawlDO bookCrawl){
public int save(BookCrawlDO bookCrawl) {
return bookCrawlDao.save(bookCrawl);
}
@Override
public int update(BookCrawlDO bookCrawl){
public int update(BookCrawlDO bookCrawl) {
return bookCrawlDao.update(bookCrawl);
}
@Override
public int remove(Long id){
public int remove(Long id) {
return bookCrawlDao.remove(id);
}
@Override
public int batchRemove(Long[] ids){
public int batchRemove(Long[] ids) {
return bookCrawlDao.batchRemove(ids);
}
@ -83,7 +83,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
public void updateStatus(BookCrawlDO bookCrawl) {
bookCrawlDao.update(bookCrawl);
if(bookCrawl.getStatus() == 0){
if (bookCrawl.getStatus() == 0) {
switch (bookCrawl.getCrawlWebCode()) {
case 1: {
isInteruptBiquDaoCrawl = true;
@ -94,15 +94,15 @@ public class BookCrawlServiceImpl implements BookCrawlService {
break;
}
}
}else{
crawlBook(bookCrawl.getCrawlWebCode());
} else {
crawlBook(bookCrawl);
}
}
private void crawlBook(int status){
private void crawlBook(BookCrawlDO bookCrawl) {
for (int i = 1; i <= 7; i++) {
int finalI = i;
@ -110,24 +110,279 @@ public class BookCrawlServiceImpl implements BookCrawlService {
() -> {
try {
switch (status) {
switch (bookCrawl.getCrawlWebCode()) {
case 1: {
while (true) {
if (isInteruptBiquDaoCrawl) {
return;
}
crawBiqudaoBooks(finalI);
break;
Thread.sleep(1000 * 60 * 60 * 24);
}
}
case 2: {
crawBiquTaBooks(finalI);
break;
while (true) {
if (isInteruptBiquTaCrawl) {
return;
}
crawBiquTaBooks(finalI);
Thread.sleep(1000 * 60 * 60 * 24);
}
}
}
} catch (Exception e) {
e.printStackTrace();
bookCrawl.setStatus(0);
bookCrawlDao.update(bookCrawl);
}
}
).start();
}
for (int j = 21; j <= 29; j++) {
int finalJ = j;
new Thread(() -> {
for (int i = 1; i <= 499; i++) {
if(isInteruptBiquTaCrawl || isInteruptBiquDaoCrawl){
return;
}
System.out.println("==============分类============" + finalJ);
System.out.println("==============页码============" + i);
int catId = finalJ;
int page = i;
String bookListUrl = "http://book.sfacg.com/List/default.aspx?&tid=" + catId + "&if=1&PageIndex=" + page;
String forObject = getByHttpClient(bookListUrl);
if (forObject != null) {
Pattern bookPatten = Pattern.compile("href=\"/Novel/(\\d+)/\"");
Matcher bookMatcher = bookPatten.matcher(forObject);
boolean isFindBook = bookMatcher.find();
while (isFindBook) {
try {
if(isInteruptBiquTaCrawl || isInteruptBiquDaoCrawl){
return;
}
long bookNum = Long.parseLong(bookMatcher.group(1));
String bookUrl = "http://book.sfacg.com/Novel/" + bookNum;
String forObject1 = getByHttpClient(bookUrl);
if (forObject1 != null) {
Pattern updateTimePatten = Pattern.compile("更新:(\\d+/\\d+/\\d+ \\d+:\\d+:\\d+)");
Matcher updateTimeMatch = updateTimePatten.matcher(forObject1);
boolean isFindUpdateTime = updateTimeMatch.find();
if (isFindUpdateTime) {
String updateTimeStr = updateTimeMatch.group(1);
String dateStr = updateTimeStr;
int firstPos = dateStr.indexOf("/");
String year = dateStr.substring(0, firstPos);
dateStr = dateStr.substring(firstPos + 1);
firstPos = dateStr.indexOf("/");
String month = dateStr.substring(0, firstPos);
dateStr = dateStr.substring(firstPos + 1);
firstPos = dateStr.indexOf(" ");
String day = dateStr.substring(0, firstPos);
dateStr = dateStr.substring(firstPos + 1);
firstPos = dateStr.indexOf(":");
String hour = dateStr.substring(0, firstPos);
dateStr = dateStr.substring(firstPos + 1);
firstPos = dateStr.indexOf(":");
String minus = dateStr.substring(0, firstPos);
String second = dateStr.substring(firstPos + 1);
if (month.length() == 1) {
month = "0" + month;
}
if (day.length() == 1) {
day = "0" + day;
}
if (hour.length() == 1) {
hour = "0" + hour;
}
if (minus.length() == 1) {
minus = "0" + minus;
}
if (second.length() == 1) {
second = "0" + second;
}
Date updateTime = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss").parse(updateTimeStr);
//Date updateTime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(year+"-"+month+"-"+hour+" "+minus+" "+minus);
Pattern bookNamePatten = Pattern.compile("<h1 class=\"title\">\\s*" +
"<span class=\"text\">([^<]+)</span>\\s*" +
"<span");
Matcher bookNameMatcher = bookNamePatten.matcher(forObject1);
boolean isFindBookName = bookNameMatcher.find();
if (isFindBookName) {
String bookName = bookNameMatcher.group(1);
System.out.println(bookName);
Pattern authorPatten = Pattern.compile("<div class=\"author-name\">\\s*" +
"<span>([^<]+)</span>\\s*" +
"</div>");
Matcher authorMatcher = authorPatten.matcher(forObject1);
boolean isFindAuthor = authorMatcher.find();
if (isFindAuthor) {
String author = authorMatcher.group(1);
Pattern picPtten = Pattern.compile("src=\"(http://rs.sfacg.com/web/novel/images/NovelCover/Big/[^\"]+)\"");
Matcher picMatcher = picPtten.matcher(forObject1);
if (picMatcher.find()) {
String pic = picMatcher.group(1);
Pattern visitPatten = Pattern.compile(">点击:(\\d+)<");
Matcher visitMatcher = visitPatten.matcher(forObject1);
boolean isFindVisit = visitMatcher.find();
if (isFindVisit) {
String visit = visitMatcher.group(1);
Pattern statusPatten = Pattern.compile(">字数:\\d+字\\[([^<]+)\\]<");
Matcher statusMatcher = statusPatten.matcher(forObject1);
boolean isFindStatus = statusMatcher.find();
if (isFindStatus) {
String status = statusMatcher.group(1);
if ("已完结".equals(status)) {//先爬已完结的
status = "已完成";
}
Pattern scorePatten = Pattern.compile("<div class=\"num\">\\s*" +
"<span>(\\d+\\.\\d+)</span>\\s*" +
"</div>");
Matcher scoreMather = scorePatten.matcher(forObject1);
boolean isFindScore = scoreMather.find();
if (isFindScore) {
float score = Float.parseFloat(scoreMather.group(1));
//if (score >= 7.0) {
Pattern descPatten = Pattern.compile("<p class=\"introduce\">\\s*" +
"([^<]+)\\s*</p>");
Matcher descMatcher = descPatten.matcher(forObject1);
boolean isFindDesc = descMatcher.find();
if (isFindDesc) {
String desc = descMatcher.group(1);
Pattern tagPatten = Pattern.compile("<li class=\"tag\">\\s*" +
"<a href=\"/stag/\\d+/\" class=\"highlight\"><span class=\"icn\">[^<]+</span><span class=\"text\">([^<]+)</span></a>\\s*" +
"</li>");
Matcher tagMatch = tagPatten.matcher(forObject1);
String tag = "";
boolean isFindTag = tagMatch.find();
while (isFindTag) {
tag += ("," + tagMatch.group(1));
isFindTag = tagMatch.find();
}
if (tag.length() > 0) {
tag = tag.substring(1);
}
BookDO book = new BookDO();
book.setAuthor(author);
book.setCatid(8);
book.setBookDesc(desc);
book.setBookName(bookName);
book.setSoftTag(tag);
book.setSoftCat(catId);
book.setScore(score > 10 ? 8.0f : score);
book.setVisitCount(Long.parseLong(visit));
book.setPicUrl(pic);
book.setBookStatus(status);
book.setUpdateTime(updateTime);
List<BookIndexDO> indexList = new ArrayList<>();
List<BookContentDO> contentList = new ArrayList<>();
//读取目录
String indexUrl = "http://book.sfacg.com/Novel/" + bookNum + "/MainIndex/";
String forObject2 = getByHttpClient(indexUrl);
if (forObject2 != null) {
Pattern indexListPatten = Pattern.compile("href=\"(/Novel/\\d+/\\d+/\\d+/)\"\\s+title=\"([^\"]+)\\s*");
Matcher indexListMatch = indexListPatten.matcher(forObject2);
boolean isFindIndex = indexListMatch.find();
int indexNum = 0;
//查询该书籍已存在目录号
List<Integer> hasIndexNum = queryIndexCountByBookNameAndBAuthor(bookName, author);
while (isFindIndex) {
if(isInteruptBiquTaCrawl || isInteruptBiquDaoCrawl){
return;
}
if (!hasIndexNum.contains(indexNum)) {
String contentUrl = "http://book.sfacg.com" + indexListMatch.group(1);
String indexName = indexListMatch.group(2);
//查询章节内容
String forObject3 = getByHttpClient(contentUrl);
if (forObject3 != null && !forObject3.contains("内容整改中,请等待")) {
String content = forObject3.substring(forObject3.indexOf("<div class=\"article-content"));
content = content.substring(0, content.indexOf("</div>") + 6);
//TODO插入章节目录和章节内容
BookIndexDO bookIndex = new BookIndexDO();
bookIndex.setIndexName(indexName);
bookIndex.setIndexNum(indexNum);
indexList.add(bookIndex);
BookContentDO bookContent = new BookContentDO();
bookContent.setContent(content);
bookContent.setIndexNum(indexNum);
contentList.add(bookContent);
} else {
break;
}
}
indexNum++;
isFindIndex = indexListMatch.find();
}
if (indexList.size() == contentList.size() && indexList.size() > 0) {
saveBookAndIndexAndContent(book, indexList, contentList);
}
}
}
}
}
}
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
isFindBook = bookMatcher.find();
}
}
}
}
}).start();
}
}
private void crawBiquTaBooks(int i) {
@ -151,7 +406,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
Pattern bookPatten = Pattern.compile("href=\"/(\\d+_\\d+)/\"");
parseBiquTaBook(bookPatten, forObject, i, baseUrl);
while (currentPage < totalPage) {
if(isInteruptBiquTaCrawl){
if (isInteruptBiquTaCrawl) {
return;
}
@ -195,7 +450,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
System.out.println("匹配分数" + scoreFind);
while (isFind && scoreFind && isBookNameMatch && isFindAuthor) {
if(isInteruptBiquTaCrawl){
if (isInteruptBiquTaCrawl) {
return;
}
@ -273,7 +528,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
List<Integer> hasIndexNum = queryIndexCountByBookNameAndBAuthor(bookName, author);
while (isFindIndex) {
if(isInteruptBiquTaCrawl){
if (isInteruptBiquTaCrawl) {
return;
}
@ -370,7 +625,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
parseBiqudaoBook(bookPatten, forObject, i, baseUrl);
while (currentPage < totalPage) {
if(isInteruptBiquDaoCrawl){
if (isInteruptBiquDaoCrawl) {
return;
}
@ -418,7 +673,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
while (isFind && scoreFind && isBookNameMatch && isFindAuthor) {
try {
if(isInteruptBiquDaoCrawl){
if (isInteruptBiquDaoCrawl) {
return;
}
@ -496,7 +751,7 @@ public class BookCrawlServiceImpl implements BookCrawlService {
List<Integer> hasIndexNum = queryIndexCountByBookNameAndBAuthor(bookName, author);
while (isFindIndex) {
if(isInteruptBiquDaoCrawl){
if (isInteruptBiquDaoCrawl) {
return;
}
if (!hasIndexNum.contains(indexNum)) {
@ -573,9 +828,9 @@ public class BookCrawlServiceImpl implements BookCrawlService {
Long bookId = -1l;
book.setBookName(book.getBookName().trim());
book.setAuthor(book.getAuthor().trim());
Map<String,Object> bookExample = new HashMap<>();
bookExample.put("bookName",book.getBookName());
bookExample.put("author",book.getAuthor());
Map<String, Object> bookExample = new HashMap<>();
bookExample.put("bookName", book.getBookName());
bookExample.put("author", book.getAuthor());
List<BookDO> books = bookDao.list(bookExample);
if (books.size() > 0) {
//更新
@ -630,8 +885,6 @@ public class BookCrawlServiceImpl implements BookCrawlService {
}
}
@ -677,15 +930,15 @@ public class BookCrawlServiceImpl implements BookCrawlService {
*/
private List<Integer> queryIndexCountByBookNameAndBAuthor(String bookName, String author) {
List<Integer> result = new ArrayList<>();
Map<String,Object> bookExample = new HashMap<>();
bookExample.put("bookName",bookName);
bookExample.put("author",author);
Map<String, Object> bookExample = new HashMap<>();
bookExample.put("bookName", bookName);
bookExample.put("author", author);
List<BookDO> books = bookDao.list(bookExample);
if (books.size() > 0) {
Long bookId = books.get(0).getId();
Map<String,Object> bookIndexExample = new HashMap<>();
bookExample.put("bookId",bookId);
Map<String, Object> bookIndexExample = new HashMap<>();
bookExample.put("bookId", bookId);
List<BookIndexDO> bookIndices = bookIndexDao.list(bookIndexExample);
if (bookIndices != null && bookIndices.size() > 0) {
for (BookIndexDO bookIndex : bookIndices) {

View File

@ -35,8 +35,8 @@ spring:
type: com.alibaba.druid.pool.DruidDataSource
driverClassName: com.mysql.jdbc.Driver
url: jdbc:mysql://127.0.0.1:3306/books?useUnicode=true&characterEncoding=utf8&serverTimezone=Asia/Shanghai
username: root
password: test123456
username: books
password: books
#password:
initialSize: 1
minIdle: 3

View File

@ -0,0 +1,10 @@
#爬虫线程数
threadCount=1
#爬取优先级 1评分优先 2更新时间优先
priority=1
#小说最低评分0表示不限制
lowestScore=0
#小说最小更新时间
minUptTime=2000-01-01 00:00:00
#爬取最大条数,0表示不限制
maxNumber=0

View File

@ -159,14 +159,15 @@ function detail(id) {
});
}
function edit(id) {
function edit(){
console.log('打开配置页面');
layer.open({
type: 2,
title: '编辑',
maxmin: true,
shadeClose: false, // 点击遮罩关闭层
area: ['800px', '520px'],
content: prefix + '/edit/' + id // iframe的url
type : 2,
title : '增加',
maxmin : true,
shadeClose : false,
area : [ '800px', '520px' ],
content : prefix + '/edit'
});
}

View File

@ -9,14 +9,9 @@
<div class="ibox-body">
<div class="fixed-table-toolbar">
<div class="columns pull-left">
<button shiro:hasPermission="books:bookCrawl:add" type="button"
class="btn btn-primary" onclick="add()">
<i class="fa fa-plus" aria-hidden="true"></i>添加
</button>
<button shiro:hasPermission="books:bookCrawl:batchRemove" type="button"
class="btn btn-danger"
onclick="batchRemove()">
<i class="fa fa-trash" aria-hidden="true"></i>删除
<button type="button" class="btn btn-info"
onclick="edit()">
<i class="" aria-hidden="true"></i>爬虫配置
</button>
</div>
<div class="columns pull-right">

View File

@ -1,5 +1,5 @@
server:
port: 80
port: 8080
spring:
datasource:

View File

@ -309,35 +309,16 @@
// 弹幕定时器
var timers = [];
// 控制弹幕显隐变量
// 监听发送按钮
$(".send").on("click", function () {
var bullet = $("#screenBulletText").val();
var contentId = $("#contentIdHidden").val();
if (bullet && contentId) {
$.ajax({
type: "POST",
url: "/book/sendBullet",
contentType: 'application/x-www-form-urlencoded;charset=utf-8',
data: {contentId: contentId, bullet: bullet},
dataType: "json",
success: function (data) {
console.log(data);
var bullet = $("#screenBulletText").val("")
},
error: function (e) {
console.log(e);
layer.alert("发送失败");
return;
//监听键盘事件
$("#screenBulletText").keypress(function (even) {
if (even.which == 13) {
//enter键按下
sendBullet();
}
});
} else {
layer.alert("发送内容不能为空");
return;
}
// 创建弹幕
var jqueryDom = createScreenbullet(bullet);
// 添加定时任务
addInterval(jqueryDom);
// 监听发送按钮
$(".send").on("click", function () {
sendBullet();
});
// 监听关闭弹幕按钮
$("[lay-skin='_switch']").click(function () {
@ -488,6 +469,36 @@
while(true) if(new Date().getTime()-start > n) break;
}
//发送弹幕
function sendBullet(){
var bullet = $("#screenBulletText").val();
var contentId = $("#contentIdHidden").val();
if (bullet && contentId) {
$.ajax({
type: "POST",
url: "/book/sendBullet",
contentType: 'application/x-www-form-urlencoded;charset=utf-8',
data: {contentId: contentId, bullet: bullet},
dataType: "json",
success: function (data) {
console.log(data);
var bullet = $("#screenBulletText").val("")
},
error: function (e) {
console.log(e);
layer.alert("发送失败");
return;
}
});
} else {
layer.alert("发送内容不能为空");
return;
}
// 创建弹幕
var jqueryDom = createScreenbullet(bullet);
// 添加定时任务
addInterval(jqueryDom);
}
</script>
</html>