mirror of
https://github.com/201206030/novel-plus.git
synced 2025-07-05 08:46:38 +00:00
上传代码
This commit is contained in:
@ -0,0 +1,25 @@
|
||||
package com.java2nb.novel;
|
||||
|
||||
import org.mybatis.spring.annotation.MapperScan;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.web.servlet.ServletComponentScan;
|
||||
import org.springframework.cache.annotation.EnableCaching;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
@SpringBootApplication
|
||||
@EnableCaching
|
||||
@EnableScheduling
|
||||
@ServletComponentScan
|
||||
@MapperScan(basePackages = {"com.java2nb.novel.mapper"})
|
||||
public class CrawlNovelApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(CrawlNovelApplication.class);
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,62 @@
|
||||
package com.java2nb.novel.controller;
|
||||
|
||||
import com.github.pagehelper.PageInfo;
|
||||
import com.java2nb.novel.core.bean.ResultBean;
|
||||
import com.java2nb.novel.core.utils.BeanUtil;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
import com.java2nb.novel.service.CrawlService;
|
||||
import com.java2nb.novel.vo.CrawlSourceVO;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
@RestController
|
||||
@RequestMapping("crawl")
|
||||
@RequiredArgsConstructor
|
||||
public class CrawlController {
|
||||
|
||||
private final CrawlService crawlService;
|
||||
|
||||
|
||||
/**
|
||||
* 新增爬虫源
|
||||
* */
|
||||
@PostMapping("addCrawlSource")
|
||||
public ResultBean addCrawlSource(CrawlSource source){
|
||||
crawlService.addCrawlSource(source);
|
||||
|
||||
return ResultBean.ok();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 爬虫源分页列表查询
|
||||
* */
|
||||
@PostMapping("listCrawlByPage")
|
||||
public ResultBean listCrawlByPage(@RequestParam(value = "curr", defaultValue = "1") int page, @RequestParam(value = "limit", defaultValue = "10") int pageSize){
|
||||
|
||||
return ResultBean.ok(new PageInfo<>(BeanUtil.copyList(crawlService.listCrawlByPage(page,pageSize), CrawlSourceVO.class)
|
||||
));
|
||||
}
|
||||
|
||||
/**
|
||||
* 开启或停止爬虫
|
||||
* */
|
||||
@PostMapping("openOrCloseCrawl")
|
||||
public ResultBean openOrCloseCrawl(Integer sourceId,Byte sourceStatus){
|
||||
|
||||
crawlService.openOrCloseCrawl(sourceId,sourceStatus);
|
||||
|
||||
return ResultBean.ok();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
package com.java2nb.novel.controller;
|
||||
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
import com.java2nb.novel.entity.News;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author 11797
|
||||
*/
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@Controller
|
||||
public class PageController {
|
||||
|
||||
|
||||
|
||||
@RequestMapping("{url}.html")
|
||||
public String module(@PathVariable("url") String url) {
|
||||
return url;
|
||||
}
|
||||
|
||||
@RequestMapping("{module}/{url}.html")
|
||||
public String module2(@PathVariable("module") String module, @PathVariable("url") String url) {
|
||||
return module + "/" + url;
|
||||
}
|
||||
|
||||
@RequestMapping("{module}/{classify}/{url}.html")
|
||||
public String module3(@PathVariable("module") String module, @PathVariable("classify") String classify, @PathVariable("url") String url) {
|
||||
return module + "/" + classify + "/" + url;
|
||||
}
|
||||
|
||||
/**
|
||||
* 首页
|
||||
* */
|
||||
@RequestMapping(path = {"/", "/index", "/index.html"})
|
||||
public String index() {
|
||||
return "crawl/crawlSource_list";
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,64 @@
|
||||
package com.java2nb.novel.core.config;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.security.config.annotation.authentication.builders.AuthenticationManagerBuilder;
|
||||
import org.springframework.security.config.annotation.web.builders.HttpSecurity;
|
||||
import org.springframework.security.config.annotation.web.builders.WebSecurity;
|
||||
import org.springframework.security.config.annotation.web.configuration.EnableWebSecurity;
|
||||
import org.springframework.security.config.annotation.web.configuration.WebSecurityConfigurerAdapter;
|
||||
import org.springframework.security.core.userdetails.User;
|
||||
import org.springframework.security.crypto.bcrypt.BCryptPasswordEncoder;
|
||||
import org.springframework.security.crypto.password.PasswordEncoder;
|
||||
|
||||
/**
|
||||
* SpringSecurity配置
|
||||
* @author Administrator
|
||||
*/
|
||||
@Configuration
|
||||
@EnableWebSecurity
|
||||
@RequiredArgsConstructor
|
||||
public class SecurityConfiguration extends WebSecurityConfigurerAdapter {
|
||||
|
||||
@Value("${admin.username}")
|
||||
private String username;
|
||||
|
||||
@Value("${admin.password}")
|
||||
private String password;
|
||||
|
||||
|
||||
@Bean
|
||||
public PasswordEncoder passwordEncoder() {
|
||||
return new BCryptPasswordEncoder();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void configure(WebSecurity web) throws Exception {
|
||||
super.configure(web);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(AuthenticationManagerBuilder auth) throws Exception {
|
||||
|
||||
User.UserBuilder builder = User.builder().passwordEncoder(passwordEncoder()::encode);
|
||||
auth.inMemoryAuthentication().withUser(builder.username(username).password(password).roles("ADMIN").build());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void configure(HttpSecurity http) throws Exception {
|
||||
http.csrf().disable()//禁用了 csrf 功能
|
||||
.authorizeRequests()//限定签名成功的请求
|
||||
.antMatchers("/**").hasRole("ADMIN")
|
||||
.anyRequest().permitAll()//其他没有限定的请求,允许访问
|
||||
.and().anonymous()//对于没有配置权限的其他请求允许匿名访问
|
||||
.and().formLogin()//使用 spring security 默认登录页面
|
||||
.and().httpBasic();//启用http 基础验证
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
package com.java2nb.novel.core.listener;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.java2nb.novel.core.crawl.CrawlParser;
|
||||
import com.java2nb.novel.core.crawl.RuleBean;
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
import com.java2nb.novel.service.BookService;
|
||||
import com.java2nb.novel.service.CrawlService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.time.DateUtils;
|
||||
|
||||
import javax.servlet.ServletContextEvent;
|
||||
import javax.servlet.ServletContextListener;
|
||||
import javax.servlet.annotation.WebListener;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
@WebListener
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class StarterListener implements ServletContextListener {
|
||||
|
||||
private final BookService bookService;
|
||||
|
||||
private final CrawlService crawlService;
|
||||
|
||||
@Override
|
||||
public void contextInitialized(ServletContextEvent sce) {
|
||||
log.info("程序启动,开始执行自动更新线程。。。");
|
||||
new Thread(() -> {
|
||||
while (true) {
|
||||
try {
|
||||
//1.查询最新目录更新时间在一个月之内的前100条需要更新的数据
|
||||
Date currentDate = new Date();
|
||||
Date startDate = DateUtils.addDays(currentDate, -30);
|
||||
List<Book> bookList = bookService.queryNeedUpdateBook(startDate, 100);
|
||||
for (Book needUpdateBook : bookList) {
|
||||
try {
|
||||
//查询爬虫源规则
|
||||
CrawlSource source = crawlService.queryCrawlSource(needUpdateBook.getCrawlSourceId());
|
||||
RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class);
|
||||
//解析小说基本信息
|
||||
Book book = CrawlParser.parseBook(ruleBean, needUpdateBook.getCrawlBookId());
|
||||
//这里只做老书更新
|
||||
book.setCrawlLastTime(currentDate);
|
||||
book.setId(needUpdateBook.getId());
|
||||
//查询已存在的章节
|
||||
Map<Integer, BookIndex> existBookIndexMap = bookService.queryExistBookIndexMap(needUpdateBook.getId());
|
||||
//解析章节目录
|
||||
Map<Integer, List> indexAndContentList = CrawlParser.parseBookIndexAndContent(needUpdateBook.getCrawlBookId(),book, ruleBean, existBookIndexMap);
|
||||
bookService.updateBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY));
|
||||
}catch (Exception e){
|
||||
log.error(e.getMessage(), e);
|
||||
//解析异常中断,更新一下小说的最后解析时间
|
||||
bookService.updateCrawlLastTime(needUpdateBook.getId());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Thread.sleep(1000 * 60 * 10);
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}
|
||||
|
||||
}
|
||||
}).start();
|
||||
}
|
||||
}
|
@ -0,0 +1,63 @@
|
||||
package com.java2nb.novel.core.schedule;
|
||||
|
||||
|
||||
import com.java2nb.novel.core.cache.CacheKey;
|
||||
import com.java2nb.novel.core.cache.CacheService;
|
||||
import com.java2nb.novel.core.utils.ThreadUtil;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
import com.java2nb.novel.service.CrawlService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* 爬虫线程监控器,监控执行完成的爬虫源,并修改状态
|
||||
*
|
||||
* @author Administrator
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class CrawlThreadMonitor {
|
||||
|
||||
private final CacheService cacheService;
|
||||
|
||||
private final CrawlService crawlService;
|
||||
|
||||
@Scheduled(fixedRate = 1000 * 60 * 5)
|
||||
public void monitor() {
|
||||
|
||||
//查询需要监控的正在运行的爬虫源
|
||||
List<CrawlSource> sources = crawlService.queryCrawlSourceByStatus((byte) 1);
|
||||
|
||||
for (CrawlSource source : sources) {
|
||||
Set<Long> runningCrawlThreadIds = (Set<Long>) cacheService.getObject(CacheKey.RUNNING_CRAWL_THREAD_KEY_PREFIX + source.getId());
|
||||
boolean sourceStop = true;
|
||||
if (runningCrawlThreadIds != null) {
|
||||
for (Long threadId : runningCrawlThreadIds) {
|
||||
Thread thread = ThreadUtil.findThread(threadId);
|
||||
|
||||
if (thread != null && thread.isAlive()) {
|
||||
//有活跃线程,说明该爬虫源正在运行,数据库中状态正确,不需要修改
|
||||
sourceStop = false;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (sourceStop) {
|
||||
crawlService.updateCrawlSourceStatus(source.getId(), (byte) 0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package com.java2nb.novel.mapper;
|
||||
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
public interface CrawlBookIndexMapper extends BookIndexMapper {
|
||||
|
||||
|
||||
/**
|
||||
* 查询最后的章节
|
||||
* */
|
||||
BookIndex queryLastIndex(@Param("bookId") Long bookId);
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package com.java2nb.novel.mapper;
|
||||
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
public interface CrawlBookMapper extends BookMapper {
|
||||
|
||||
/**
|
||||
* 查询需要更新的小说数据
|
||||
* @param startDate 最新更新时间的起始时间
|
||||
* @param limit 查询条数
|
||||
* @return 小说集合
|
||||
* */
|
||||
List<Book> queryNeedUpdateBook(@Param("startDate") Date startDate, @Param("limit") int limit);
|
||||
|
||||
/**
|
||||
* 查询小说总字数
|
||||
* @param bookId 小说ID
|
||||
* @return 小说总字数
|
||||
* */
|
||||
Integer queryTotalWordCount(@Param("bookId") Long bookId);
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
package com.java2nb.novel.service;
|
||||
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
public interface BookService {
|
||||
|
||||
|
||||
/**
|
||||
* 根据小说名和作者名查询是否存在
|
||||
* @param bookName 小说名
|
||||
* @param authorName 作者名
|
||||
* @return 是否存在该小说名和作者名的小说
|
||||
*/
|
||||
boolean queryIsExistByBookNameAndAuthorName(String bookName, String authorName);
|
||||
|
||||
/**
|
||||
* 更新书籍的爬虫属性
|
||||
* @param sourceId 爬虫源ID
|
||||
* @param bookId 源站小说ID
|
||||
* */
|
||||
void updateCrawlProperties(Integer sourceId, String bookId);
|
||||
|
||||
/**
|
||||
* 通过分类ID查询分类名
|
||||
* @param catId 分类ID
|
||||
* @return 分类名
|
||||
* */
|
||||
String queryCatNameByCatId(int catId);
|
||||
|
||||
/**
|
||||
* 保存小说表,目录表,内容表数据
|
||||
* @param book 小说数据
|
||||
* @param bookIndexList 目录集合
|
||||
* @param bookContentList 内容集合
|
||||
* */
|
||||
void saveBookAndIndexAndContent(Book book, List<BookIndex> bookIndexList, List<BookContent> bookContentList);
|
||||
|
||||
/**
|
||||
* 查询需要更新的小说数据
|
||||
*
|
||||
* @param startDate 最新更新时间的起始时间
|
||||
* @param limit 查询条数
|
||||
* @return 小说集合
|
||||
* */
|
||||
List<Book> queryNeedUpdateBook(Date startDate, int limit);
|
||||
|
||||
/**
|
||||
* 查询已存在的章节
|
||||
* @param bookId 小说ID
|
||||
* @return 章节号和章节数据对映射map
|
||||
* */
|
||||
Map<Integer,BookIndex> queryExistBookIndexMap(Long bookId);
|
||||
|
||||
/**
|
||||
* 更新小说表,目录表,内容表数据
|
||||
* @param book 小说数据
|
||||
* @param bookIndexList 目录集合
|
||||
* @param bookContentList 内容集合
|
||||
* */
|
||||
void updateBookAndIndexAndContent(Book book, List<BookIndex> bookIndexList, List<BookContent> bookContentList);
|
||||
|
||||
/**
|
||||
* 更新一下最后一次的抓取时间
|
||||
* @param bookId 小说ID
|
||||
* */
|
||||
void updateCrawlLastTime(Long bookId);
|
||||
}
|
@ -0,0 +1,64 @@
|
||||
package com.java2nb.novel.service;
|
||||
|
||||
import com.java2nb.novel.core.crawl.RuleBean;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
public interface CrawlService {
|
||||
|
||||
/**
|
||||
* 新增爬虫源
|
||||
* @param source 爬虫源提交的数据对象
|
||||
* */
|
||||
void addCrawlSource(CrawlSource source);
|
||||
|
||||
|
||||
/**
|
||||
* 爬虫源分页列表
|
||||
* @param page 当前页码
|
||||
* @param pageSize 分页大小
|
||||
*@return 爬虫源集合
|
||||
* */
|
||||
List<CrawlSource> listCrawlByPage(int page, int pageSize);
|
||||
|
||||
/**
|
||||
* 开启或停止爬虫
|
||||
* @param sourceId 爬虫源ID
|
||||
* @param sourceStatus 状态,0关闭,1开启
|
||||
* */
|
||||
void openOrCloseCrawl(Integer sourceId, Byte sourceStatus);
|
||||
|
||||
/**
|
||||
* 更新爬虫状态
|
||||
* @param sourceId 爬虫源ID
|
||||
* @param sourceStatus 状态,0关闭,1开启
|
||||
* */
|
||||
void updateCrawlSourceStatus(Integer sourceId, Byte sourceStatus);
|
||||
|
||||
/**
|
||||
* 根据爬虫状态查询爬虫源集合
|
||||
* @param sourceStatus 状态,0关闭,1开启
|
||||
* @return 返回爬虫源集合
|
||||
* */
|
||||
List<CrawlSource> queryCrawlSourceByStatus(Byte sourceStatus);
|
||||
|
||||
/**
|
||||
* 根据分类ID和规则解析分类列表
|
||||
* @param catId 分类ID
|
||||
* @param ruleBean 规则对象
|
||||
* @param sourceId
|
||||
*/
|
||||
void parseBookList(int catId, RuleBean ruleBean, Integer sourceId);
|
||||
|
||||
|
||||
/**
|
||||
* 查询爬虫源
|
||||
* @param sourceId 源ID
|
||||
* @return 源信息
|
||||
* */
|
||||
CrawlSource queryCrawlSource(Integer sourceId);
|
||||
}
|
@ -0,0 +1,194 @@
|
||||
package com.java2nb.novel.service.impl;
|
||||
|
||||
import com.java2nb.novel.core.utils.IdWorker;
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
import com.java2nb.novel.mapper.*;
|
||||
import com.java2nb.novel.service.BookService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.mybatis.dynamic.sql.render.RenderingStrategies;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static com.java2nb.novel.mapper.BookDynamicSqlSupport.crawlBookId;
|
||||
import static com.java2nb.novel.mapper.BookDynamicSqlSupport.crawlSourceId;
|
||||
import static com.java2nb.novel.mapper.CrawlSourceDynamicSqlSupport.id;
|
||||
import static org.mybatis.dynamic.sql.SqlBuilder.*;
|
||||
import static org.mybatis.dynamic.sql.select.SelectDSL.select;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class BookServiceImpl implements BookService {
|
||||
|
||||
private final CrawlBookMapper bookMapper;
|
||||
|
||||
private final BookCategoryMapper bookCategoryMapper;
|
||||
|
||||
private final CrawlBookIndexMapper bookIndexMapper;
|
||||
|
||||
private final BookContentMapper bookContentMapper;
|
||||
|
||||
|
||||
@Override
|
||||
public boolean queryIsExistByBookNameAndAuthorName(String bookName, String authorName) {
|
||||
|
||||
return bookMapper.count(countFrom(BookDynamicSqlSupport.book).where(BookDynamicSqlSupport.bookName, isEqualTo(bookName))
|
||||
.and(BookDynamicSqlSupport.authorName, isEqualTo(authorName))
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3))>0;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateCrawlProperties(Integer sourceId, String bookId) {
|
||||
bookMapper.update(update(BookDynamicSqlSupport.book)
|
||||
.set(crawlSourceId)
|
||||
.equalTo(sourceId)
|
||||
.set(crawlBookId)
|
||||
.equalTo(bookId)
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String queryCatNameByCatId(int catId) {
|
||||
return bookCategoryMapper.selectMany(select(BookCategoryDynamicSqlSupport.name)
|
||||
.from(BookCategoryDynamicSqlSupport.bookCategory)
|
||||
.where(id, isEqualTo(catId))
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3)).get(0).getName();
|
||||
}
|
||||
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
@Override
|
||||
public void saveBookAndIndexAndContent(Book book, List<BookIndex> bookIndexList, List<BookContent> bookContentList) {
|
||||
if(!queryIsExistByBookNameAndAuthorName(book.getBookName(),book.getAuthorName())) {
|
||||
|
||||
if(bookIndexList.size()>0) {
|
||||
|
||||
if (book.getId() == null) {
|
||||
book.setId(new IdWorker().nextId());
|
||||
}
|
||||
|
||||
//保存小说主表
|
||||
|
||||
bookMapper.insertSelective(book);
|
||||
|
||||
//批量保存目录和内容
|
||||
bookIndexMapper.insertMultiple(bookIndexList);
|
||||
bookContentMapper.insertMultiple(bookContentList);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Book> queryNeedUpdateBook(Date startDate, int limit) {
|
||||
return bookMapper.queryNeedUpdateBook(startDate, limit);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Integer, BookIndex> queryExistBookIndexMap(Long bookId) {
|
||||
List<BookIndex> bookIndexs = bookIndexMapper.selectMany(select(BookIndexDynamicSqlSupport.id,BookIndexDynamicSqlSupport.indexNum,BookIndexDynamicSqlSupport.indexName)
|
||||
.from(BookIndexDynamicSqlSupport.bookIndex)
|
||||
.where(BookIndexDynamicSqlSupport.bookId,isEqualTo(bookId))
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3));
|
||||
if (bookIndexs.size() > 0) {
|
||||
return bookIndexs.stream().collect(Collectors.toMap(BookIndex::getIndexNum, Function.identity()));
|
||||
}
|
||||
return new HashMap<>(0);
|
||||
}
|
||||
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
@Override
|
||||
public void updateBookAndIndexAndContent(Book book, List<BookIndex> bookIndexList, List<BookContent> bookContentList) {
|
||||
Date currentDate = new Date();
|
||||
for (int i = 0; i < bookIndexList.size(); i++) {
|
||||
BookIndex bookIndex = bookIndexList.get(i);
|
||||
BookContent bookContent = bookContentList.get(i);
|
||||
|
||||
//插入或更新目录
|
||||
Integer wordCount = bookContent.getContent().length();
|
||||
bookIndex.setWordCount(wordCount);
|
||||
bookIndex.setUpdateTime(currentDate);
|
||||
|
||||
if(bookIndex.getId() == null) {
|
||||
//插入
|
||||
bookIndex.setBookId(book.getId());
|
||||
Long indexId = new IdWorker().nextId();
|
||||
bookIndex.setId(indexId);
|
||||
bookIndex.setCreateTime(currentDate);
|
||||
bookIndexMapper.insertSelective(bookIndex);
|
||||
}else{
|
||||
//更新
|
||||
bookIndexMapper.updateByPrimaryKeySelective(bookIndex);
|
||||
}
|
||||
|
||||
if(bookContent.getIndexId() == null) {
|
||||
//插入
|
||||
bookContent.setIndexId(bookIndex.getId());
|
||||
bookContentMapper.insertSelective(bookContent);
|
||||
}else{
|
||||
//更新
|
||||
|
||||
bookContentMapper.update(update(BookContentDynamicSqlSupport.bookContent)
|
||||
.set(BookContentDynamicSqlSupport.content)
|
||||
.equalTo(bookContent.getContent())
|
||||
.where(BookContentDynamicSqlSupport.indexId,isEqualTo(bookContent.getIndexId()))
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//更新小说主表
|
||||
book.setWordCount(queryTotalWordCount(book.getId()));
|
||||
BookIndex lastIndex = queryLastIndex(book.getId());
|
||||
book.setLastIndexId(lastIndex.getId());
|
||||
book.setLastIndexName(lastIndex.getIndexName());
|
||||
book.setLastIndexUpdateTime(lastIndex.getUpdateTime());
|
||||
book.setUpdateTime(currentDate);
|
||||
book.setBookName(null);
|
||||
book.setAuthorName(null);
|
||||
bookMapper.updateByPrimaryKeySelective(book);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateCrawlLastTime(Long bookId) {
|
||||
Book book = new Book();
|
||||
book.setId(bookId);
|
||||
book.setCrawlLastTime(new Date());
|
||||
bookMapper.updateByPrimaryKeySelective(book);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询最后的章节
|
||||
* */
|
||||
private BookIndex queryLastIndex(Long bookId) {
|
||||
return bookIndexMapper.queryLastIndex(bookId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询小说总字数
|
||||
* */
|
||||
private Integer queryTotalWordCount(Long bookId) {
|
||||
|
||||
return bookMapper.queryTotalWordCount(bookId);
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,245 @@
|
||||
package com.java2nb.novel.service.impl;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.github.pagehelper.PageHelper;
|
||||
import com.java2nb.novel.core.cache.CacheKey;
|
||||
import com.java2nb.novel.core.cache.CacheService;
|
||||
import com.java2nb.novel.core.crawl.CrawlParser;
|
||||
import com.java2nb.novel.core.crawl.RuleBean;
|
||||
import com.java2nb.novel.core.utils.IdWorker;
|
||||
import com.java2nb.novel.core.utils.SpringUtil;
|
||||
import com.java2nb.novel.core.utils.ThreadUtil;
|
||||
import com.java2nb.novel.entity.Book;
|
||||
import com.java2nb.novel.entity.BookContent;
|
||||
import com.java2nb.novel.entity.BookIndex;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
import com.java2nb.novel.mapper.*;
|
||||
import com.java2nb.novel.service.BookService;
|
||||
import com.java2nb.novel.service.CrawlService;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.mybatis.dynamic.sql.render.RenderingStrategies;
|
||||
import org.mybatis.dynamic.sql.select.render.SelectStatementProvider;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static com.java2nb.novel.core.utils.HttpUtil.getByHttpClient;
|
||||
import static com.java2nb.novel.mapper.BookDynamicSqlSupport.crawlBookId;
|
||||
import static com.java2nb.novel.mapper.BookDynamicSqlSupport.crawlSourceId;
|
||||
import static com.java2nb.novel.mapper.CrawlSourceDynamicSqlSupport.*;
|
||||
import static org.mybatis.dynamic.sql.SqlBuilder.isEqualTo;
|
||||
import static org.mybatis.dynamic.sql.SqlBuilder.update;
|
||||
import static org.mybatis.dynamic.sql.select.SelectDSL.select;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class CrawlServiceImpl implements CrawlService {
|
||||
|
||||
|
||||
private final CrawlSourceMapper crawlSourceMapper;
|
||||
|
||||
private final BookService bookService;
|
||||
|
||||
|
||||
private final CacheService cacheService;
|
||||
|
||||
|
||||
@Override
|
||||
public void addCrawlSource(CrawlSource source) {
|
||||
Date currentDate = new Date();
|
||||
source.setCreateTime(currentDate);
|
||||
source.setUpdateTime(currentDate);
|
||||
crawlSourceMapper.insertSelective(source);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<CrawlSource> listCrawlByPage(int page, int pageSize) {
|
||||
PageHelper.startPage(page, pageSize);
|
||||
SelectStatementProvider render = select(id, sourceName, sourceStatus, createTime, updateTime)
|
||||
.from(crawlSource)
|
||||
.orderBy(updateTime)
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3);
|
||||
return crawlSourceMapper.selectMany(render);
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@Override
|
||||
public void openOrCloseCrawl(Integer sourceId, Byte sourceStatus) {
|
||||
|
||||
//判断是开启还是关闭,如果是关闭,则修改数据库状态后获取该爬虫正在运行的线程集合并全部停止
|
||||
//如果是开启,先查询数据库中状态,判断该爬虫源是否还在运行,如果在运行,则忽略,
|
||||
// 如果没有则修改数据库状态,并启动线程爬取小说数据加入到runningCrawlThread中
|
||||
if (sourceStatus == (byte) 0) {
|
||||
//关闭,直接修改数据库状态,并直接修改数据库状态后获取该爬虫正在运行的线程集合全部停止
|
||||
SpringUtil.getBean(CrawlService.class).updateCrawlSourceStatus(sourceId, sourceStatus);
|
||||
Set<Long> runningCrawlThreadId = (Set<Long>) cacheService.getObject(CacheKey.RUNNING_CRAWL_THREAD_KEY_PREFIX + sourceId);
|
||||
if (runningCrawlThreadId != null) {
|
||||
for (Long ThreadId : runningCrawlThreadId) {
|
||||
Thread thread = ThreadUtil.findThread(ThreadId);
|
||||
if (thread != null && thread.isAlive()) {
|
||||
thread.interrupt();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} else {
|
||||
//开启
|
||||
//查询爬虫源状态和规则
|
||||
CrawlSource source = queryCrawlSource(sourceId);
|
||||
Byte realSourceStatus = source.getSourceStatus();
|
||||
|
||||
if (realSourceStatus == (byte) 0) {
|
||||
//该爬虫源已经停止运行了,修改数据库状态,并启动线程爬取小说数据加入到runningCrawlThread中
|
||||
SpringUtil.getBean(CrawlService.class).updateCrawlSourceStatus(sourceId, sourceStatus);
|
||||
RuleBean ruleBean = new ObjectMapper().readValue(source.getCrawlRule(), RuleBean.class);
|
||||
|
||||
Set<Long> threadIds = new HashSet<>();
|
||||
//按分类开始爬虫解析任务
|
||||
for (int i = 1; i < 8; i++) {
|
||||
final int catId = i;
|
||||
Thread thread = new Thread(() -> {
|
||||
|
||||
parseBookList(catId, ruleBean, sourceId);
|
||||
|
||||
});
|
||||
thread.start();
|
||||
//thread加入到监控缓存中
|
||||
threadIds.add(thread.getId());
|
||||
|
||||
}
|
||||
cacheService.setObject(CacheKey.RUNNING_CRAWL_THREAD_KEY_PREFIX + sourceId, threadIds);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public CrawlSource queryCrawlSource(Integer sourceId) {
|
||||
SelectStatementProvider render = select(CrawlSourceDynamicSqlSupport.sourceStatus, CrawlSourceDynamicSqlSupport.crawlRule)
|
||||
.from(crawlSource)
|
||||
.where(id, isEqualTo(sourceId))
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3);
|
||||
return crawlSourceMapper.selectMany(render).get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析分类列表
|
||||
*/
|
||||
@Override
|
||||
public void parseBookList(int catId, RuleBean ruleBean, Integer sourceId) {
|
||||
|
||||
//当前页码1
|
||||
int page = 1;
|
||||
int totalPage = page;
|
||||
|
||||
while (page <= totalPage) {
|
||||
|
||||
try {
|
||||
//拼接分类URL
|
||||
String catBookListUrl = ruleBean.getBookListUrl()
|
||||
.replace("{catId}", ruleBean.getCatIdRule().get("catId" + catId))
|
||||
.replace("{page}", page + "");
|
||||
|
||||
String bookListHtml = getByHttpClient(catBookListUrl);
|
||||
if (bookListHtml != null) {
|
||||
Pattern bookIdPatten = Pattern.compile(ruleBean.getBookIdPatten());
|
||||
Matcher bookIdMatcher = bookIdPatten.matcher(bookListHtml);
|
||||
boolean isFindBookId = bookIdMatcher.find();
|
||||
while (isFindBookId) {
|
||||
try {
|
||||
String bookId = bookIdMatcher.group(1);
|
||||
Book book = CrawlParser.parseBook(ruleBean, bookId);
|
||||
//这里只做新书入库,查询是否存在这本书
|
||||
boolean isExist = bookService.queryIsExistByBookNameAndAuthorName(book.getBookName(), book.getAuthorName());
|
||||
//如果该小说不存在,则可以解析入库,但是标记该小说正在入库,30分钟之后才允许再次入库
|
||||
if (!isExist && StringUtils.isBlank(cacheService.get(CacheKey.NEW_BOOK_IN_SAVE + book.getBookName() + "-" + book.getAuthorName()))) {
|
||||
//没有该书,可以入库
|
||||
cacheService.set(CacheKey.NEW_BOOK_IN_SAVE + book.getBookName() + "-" + book.getAuthorName(), "true", 60 * 30);
|
||||
book.setCatId(catId);
|
||||
//根据分类ID查询分类
|
||||
book.setCatName(bookService.queryCatNameByCatId(catId));
|
||||
if (catId == 7) {
|
||||
//女频
|
||||
book.setWorkDirection((byte) 1);
|
||||
} else {
|
||||
//男频
|
||||
book.setWorkDirection((byte) 0);
|
||||
}
|
||||
book.setCrawlBookId(bookId);
|
||||
book.setCrawlSourceId(sourceId);
|
||||
book.setCrawlLastTime(new Date());
|
||||
book.setId(new IdWorker().nextId());
|
||||
//解析章节目录
|
||||
Map<Integer, List> indexAndContentList = CrawlParser.parseBookIndexAndContent(bookId,book, ruleBean, new HashMap<>(0));
|
||||
|
||||
bookService.saveBookAndIndexAndContent(book, (List<BookIndex>) indexAndContentList.get(CrawlParser.BOOK_INDEX_LIST_KEY), (List<BookContent>) indexAndContentList.get(CrawlParser.BOOK_CONTENT_LIST_KEY));
|
||||
|
||||
} else {
|
||||
//只更新书籍的爬虫相关字段
|
||||
bookService.updateCrawlProperties(sourceId, bookId);
|
||||
}
|
||||
}catch (Exception e){
|
||||
log.error(e.getMessage(),e);
|
||||
}
|
||||
|
||||
|
||||
isFindBookId = bookIdMatcher.find();
|
||||
}
|
||||
|
||||
Pattern totalPagePatten = Pattern.compile(ruleBean.getTotalPagePatten());
|
||||
Matcher totalPageMatcher = totalPagePatten.matcher(bookListHtml);
|
||||
boolean isFindTotalPage = totalPageMatcher.find();
|
||||
if (isFindTotalPage) {
|
||||
|
||||
totalPage = Integer.parseInt(totalPageMatcher.group(1));
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}catch (Exception e){
|
||||
log.error(e.getMessage(),e);
|
||||
}
|
||||
|
||||
page += 1;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateCrawlSourceStatus(Integer sourceId, Byte sourceStatus) {
|
||||
CrawlSource source = new CrawlSource();
|
||||
source.setId(sourceId);
|
||||
source.setSourceStatus(sourceStatus);
|
||||
crawlSourceMapper.updateByPrimaryKeySelective(source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<CrawlSource> queryCrawlSourceByStatus(Byte sourceStatus) {
|
||||
SelectStatementProvider render = select(CrawlSourceDynamicSqlSupport.id, CrawlSourceDynamicSqlSupport.sourceStatus, CrawlSourceDynamicSqlSupport.crawlRule)
|
||||
.from(crawlSource)
|
||||
.where(CrawlSourceDynamicSqlSupport.sourceStatus, isEqualTo(sourceStatus))
|
||||
.build()
|
||||
.render(RenderingStrategies.MYBATIS3);
|
||||
return crawlSourceMapper.selectMany(render);
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package com.java2nb.novel.vo;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonFormat;
|
||||
import com.java2nb.novel.entity.CrawlSource;
|
||||
import lombok.Data;
|
||||
|
||||
import javax.annotation.Generated;
|
||||
import java.util.Date;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
@Data
|
||||
public class CrawlSourceVO extends CrawlSource{
|
||||
|
||||
@JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm")
|
||||
private Date createTime;
|
||||
|
||||
@JsonFormat(timezone = "GMT+8", pattern = "yyyy-MM-dd HH:mm")
|
||||
private Date updateTime;
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return super.toString();
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user