From 92ce982899487370c41408901496bdd570f536f6 Mon Sep 17 00:00:00 2001 From: xiongxiaoyang <773861846@qq.com> Date: Mon, 18 May 2020 14:01:49 +0800 Subject: [PATCH] =?UTF-8?q?1.=E4=BC=98=E5=8C=96=E7=88=AC=E8=99=AB=E7=BC=96?= =?UTF-8?q?=E5=86=99=E8=A7=84=E5=88=99=EF=BC=8C=E5=85=BC=E5=AE=B9=E6=9B=B4?= =?UTF-8?q?=E5=A4=9A=E7=BD=91=E7=AB=99=202.=E6=96=B0=E5=A2=9E=E4=B9=A6?= =?UTF-8?q?=E8=B6=A3=E9=98=81=E4=B9=A6=E6=BA=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + novel-admin/logs/debug.log | 3 + .../java2nb/novel/core/crawl/CrawlParser.java | 41 ++- .../java2nb/novel/core/crawl/RuleBean.java | 13 + .../com/java2nb/novel/utils/Constants.java | 5 + .../templates/crawl/crawlSource_add.html | 22 ++ .../main/resources/static/images/default.gif | Bin 0 -> 10794 bytes sql/20200518.sql | 1 + sql/novel_plus.sql | 328 ++++++++++++++---- sql/sql文件说明.txt | 3 + 10 files changed, 339 insertions(+), 79 deletions(-) create mode 100644 novel-front/src/main/resources/static/images/default.gif create mode 100644 sql/20200518.sql create mode 100644 sql/sql文件说明.txt diff --git a/.gitignore b/.gitignore index b1cc347..cdb1967 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,5 @@ /*.iml /novel-admin/*.iml .DS_Store +/novel-admin/cachedata +/novel-admin/logs diff --git a/novel-admin/logs/debug.log b/novel-admin/logs/debug.log index a4414cb..5ebdf85 100644 --- a/novel-admin/logs/debug.log +++ b/novel-admin/logs/debug.log @@ -3,3 +3,6 @@ 2020-05-13 21:52:01,131 INFO (SpringApplication.java:663)- The following profiles are active: dev 2020-05-13 21:52:54,469 DEBUG (ApplicationContextRegister.java:29)- ApplicationContext registed-->org.springframework.web.context.support.GenericWebApplicationContext@5b529706: startup date [Wed May 13 21:52:01 CST 2020]; root of context hierarchy 2020-05-13 21:53:49,622 INFO (StartupInfoLogger.java:59)- Started TestDemo in 114.268 seconds (JVM running for 124.957) +2020-05-18 09:48:03,219 INFO (StartupInfoLogger.java:50)- Starting TestDemo on DESKTOP-CPCLUI6 with PID 13172 (started by 11797 in D:\gitee\novel-plus\novel-admin) +2020-05-18 09:48:03,223 DEBUG (StartupInfoLogger.java:53)- Running with Spring Boot v2.0.1.RELEASE, Spring v5.0.5.RELEASE +2020-05-18 09:48:03,227 INFO (SpringApplication.java:663)- The following profiles are active: dev diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java index 11db043..c02195b 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/CrawlParser.java @@ -3,12 +3,17 @@ package com.java2nb.novel.core.crawl; import com.java2nb.novel.core.utils.HttpUtil; import com.java2nb.novel.core.utils.IdWorker; import com.java2nb.novel.core.utils.RandomBookInfoUtil; +import com.java2nb.novel.core.utils.RestTemplateUtil; import com.java2nb.novel.entity.Book; import com.java2nb.novel.entity.BookContent; import com.java2nb.novel.entity.BookIndex; import com.java2nb.novel.utils.Constants; import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.client.RestTemplate; import java.text.SimpleDateFormat; import java.util.*; @@ -22,17 +27,20 @@ import static java.util.regex.Pattern.compile; * * @author Administrator */ +@Slf4j public class CrawlParser { public static final Integer BOOK_INDEX_LIST_KEY = 1; public static final Integer BOOK_CONTENT_LIST_KEY = 2; + private static RestTemplate restTemplate = RestTemplateUtil.getInstance("utf-8"); + @SneakyThrows public static Book parseBook(RuleBean ruleBean, String bookId) { Book book = new Book(); String bookDetailUrl = ruleBean.getBookDetailUrl().replace("{bookId}", bookId); - String bookDetailHtml = HttpUtil.getByHttpClient(bookDetailUrl); + String bookDetailHtml = getByHttpClient(bookDetailUrl); if (bookDetailHtml != null) { Pattern bookNamePatten = compile(ruleBean.getBookNamePatten()); Matcher bookNameMatch = bookNamePatten.matcher(bookDetailHtml); @@ -54,6 +62,9 @@ public class CrawlParser { boolean isFindPicUrl = picUrlMatch.find(); if (isFindPicUrl) { String picUrl = picUrlMatch.group(1); + if(StringUtils.isNotBlank(picUrl) && StringUtils.isNotBlank(ruleBean.getPicUrlPrefix())) { + picUrl = ruleBean.getPicUrlPrefix() + picUrl; + } //设置封面图片路径 book.setPicUrl(picUrl); } @@ -136,7 +147,10 @@ public class CrawlParser { List contentList = new ArrayList<>(); //读取目录 String indexListUrl = ruleBean.getBookIndexUrl().replace("{bookId}", sourceBookId); - String indexListHtml = HttpUtil.getByHttpClient(indexListUrl); + String indexListHtml = getByHttpClient(indexListUrl); + if(StringUtils.isNotBlank(ruleBean.getBookIndexStart())){ + indexListHtml = indexListHtml.substring(indexListHtml.indexOf(ruleBean.getBookIndexStart()) + ruleBean.getBookIndexStart().length()); + } if (indexListHtml != null) { Pattern indexIdPatten = compile(ruleBean.getIndexIdPatten()); Matcher indexIdMatch = indexIdPatten.matcher(indexListHtml); @@ -162,7 +176,7 @@ public class CrawlParser { String contentUrl = ruleBean.getBookContentUrl().replace("{bookId}", sourceBookId).replace("{indexId}", indexIdMatch.group(1)); //查询章节内容 - String contentHtml = HttpUtil.getByHttpClient(contentUrl); + String contentHtml = getByHttpClient(contentUrl); if (contentHtml != null) { String content = contentHtml.substring(contentHtml.indexOf(ruleBean.getContentStart()) + ruleBean.getContentStart().length()); content = content.substring(0, content.indexOf(ruleBean.getContentEnd())); @@ -237,4 +251,25 @@ public class CrawlParser { } + private static String getByHttpClient(String url) { + try { + ResponseEntity forEntity = restTemplate.getForEntity(url, String.class); + if (forEntity.getStatusCode() == HttpStatus.OK) { + String body = forEntity.getBody(); + if(body.length() < Constants.INVALID_HTML_LENGTH){ + log.debug("获取html页面内容失败"); + Thread.sleep(10 + new Random().nextInt(60)); + return getByHttpClient(url); + } + return body; + } else { + return null; + } + } catch (Exception e) { + e.printStackTrace(); + return null; + } + } + + } diff --git a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/RuleBean.java b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/RuleBean.java index 6c7e678..5a8d319 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/RuleBean.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/core/crawl/RuleBean.java @@ -11,6 +11,14 @@ import java.util.Map; @Data public class RuleBean { + /** + * 小说更新列表url + * */ + private String updateBookListUrl; + + /** + * 分类列表页URL规则 + * */ private String bookListUrl; private Map catIdRule; @@ -39,4 +47,9 @@ public class RuleBean { private String contentEnd; + private String picUrlPrefix; + + private String bookIndexStart; + + } diff --git a/novel-crawl/src/main/java/com/java2nb/novel/utils/Constants.java b/novel-crawl/src/main/java/com/java2nb/novel/utils/Constants.java index e7a290c..5e0230a 100644 --- a/novel-crawl/src/main/java/com/java2nb/novel/utils/Constants.java +++ b/novel-crawl/src/main/java/com/java2nb/novel/utils/Constants.java @@ -14,4 +14,9 @@ public class Constants { * 访问量默认值 */ public static final Long VISIT_COUNT_DEFAULT = 100L; + + /** + * 爬取小说http请求中无效的内容长度 + */ + public static final int INVALID_HTML_LENGTH = 1000; } diff --git a/novel-crawl/src/main/resources/templates/crawl/crawlSource_add.html b/novel-crawl/src/main/resources/templates/crawl/crawlSource_add.html index 5af6e69..74b7f20 100644 --- a/novel-crawl/src/main/resources/templates/crawl/crawlSource_add.html +++ b/novel-crawl/src/main/resources/templates/crawl/crawlSource_add.html @@ -57,6 +57,9 @@
  • 示例:新顶点小说网
  • + 示例:http://m.xdingdiann.com/sort/{catId}/{page}.html ({catId}代表分类ID,{page}代表分页页码)
  • @@ -95,6 +98,9 @@ 示例:<img src="([^>]+)"\s+onerror="this.src=
  • + 可空,适用于图片路径为相对路径的源站,加上小说图片路径,则为完整的可访问的图片路径 +
  • 示例:状态:([^/]+)</li>
  • @@ -125,6 +131,9 @@ 示例:http://m.xdingdiann.com/ddk{bookId}/all.html (bookId代表小说ID)
  • + 可空,适用于最新章节列表和全部章节列表在同一个页面的源站 +
  • 示例:<a\s+style=""\s+href="/ddk\d+/(\d+)\.html">[^/]+</a>
  • @@ -278,6 +287,12 @@ crawlRule.picUrlPatten = picUrlPatten; } + var picUrlPrefix = $("#picUrlPrefix").val(); + + if (picUrlPrefix.length > 0) { + crawlRule.picUrlPrefix = picUrlPrefix; + } + var statusPatten = $("#statusPatten").val(); if (statusPatten.length > 0) { crawlRule.statusPatten = statusPatten; @@ -345,6 +360,13 @@ crawlRule.bookIndexUrl = bookIndexUrl; + + var bookIndexStart = $("#bookIndexStart").val(); + + if (bookIndexStart.length > 0) { + crawlRule.bookIndexStart = bookIndexStart; + } + var indexIdPatten = $("#indexIdPatten").val(); if (indexIdPatten.length == 0) { diff --git a/novel-front/src/main/resources/static/images/default.gif b/novel-front/src/main/resources/static/images/default.gif new file mode 100644 index 0000000000000000000000000000000000000000..4341383989608c0f8a0696dec960740852aace3e GIT binary patch literal 10794 zcmWkyc|6mP8~^O{*=dJ4a#wTT84+>~%N1HFIZ`9CL`Kp|_}FIT94f?DlOu|ZC`ZQ} zNkpqux~EjS*YW+PgZ=tF|33e`U$6J`yr1{;_VxF1a*mD#xWEqph=O|rU3UUfI^qiY z%{SL1Rt%oLG98iCed*FAZv9B-wFgXI&ym{p^z?LA(WTXq^%0fVs=LSIa$EOT54r6= zQ`*!YlqzH(&pmyveHIqFna-L)+TJGbsX zzOsQzvlTn#*>Pl_@-H5C{dO~~VnMs$p*3iQ}4zaB<+0AovFAj1JR&?K8lXT(L%ei&z zb1z@KsO`D6X@8@~?)J2rOD|r{xw1}2WVQt!>?!OT_t{xE`)YRI@j*`d!GhMIO^13S z>nHc8r>;wC-E!!BR_pLS;rTtKS2uAwD=$vJdNqGw|9+3S#?Z{Z?7ANKSj+1dvj_hf zO{%>dcjiuJ?a=-M>4#5V-W7ask8vArBndBYIO)sm zIT@AR&MB9Id6%8SayBI&e>wNOvipvCfG7;^+E#QmuVn&mzm>W_jkc#XqeAlL)vN7w zL+ShWugkpj{_DT#|8y5!m_A%1fs0QZ6W>ZcdNv@j@z2tqsFd82*6XSJQ@wVb4lKKp zmb&lNn>nA9)2!T!zZ8m+GdJVPFYU?iUmaX2XdW-=kkYp2a$2U+_NDIq=lY?4E<3T> zg454!NEO;bIzS}WpVK@sKvDIhtJ=ANiMmvIsfvGf==5I zv31Cit?B2icJ-{wI~UIRN7#AS%nSauu&_Sk^qZG&H)Qu*ntJY=e*S3B^rp^d3WdT7 z_*?(q1puo6Chet7&4aZ*o9$H^?HV^4=6Fx*+28dIvJATX%WscEqo+-j_n}s8yV#Ou zAM~Vp?do9dGoj|Qq25dM z`gx;nXlKm29~<%?_=hT_>&pX=p{9HPV_a7s6L|xaf`gSX@Lw}Rye`T#7SoE5Gud6l+zRn8W=^HogUboI5 z=wy4ukrmto>@o||^5aL%g64xa*X(oj3TiU-Iz@MMoUf3k`=8vW^-SoBTyC)=d@Qz) zJE&4;e&tf2Vi5KuX%n`N##g_{KYV;kIF&zc5Pz)0JXJ4uh&PBo_#+%?COd>Ef0NJo z6iq(KMU7VG+PsQFJkNN7a=NB2Kk{JABV@M$-7wtV6U12`of$GPvLC#rrkSr6j6sf= zy)3c|-MJI9+*P#GK=Z@bLe!K5;Y*}zgd6Glnf$z7j`j0OG8jsxy#riI*U|zBy27uX$HI2~M3IfM-p1jz=D(JjV5{`$D>)0=rX_10 zDf~|3x}W)*)@8h2fKMLz^Ye90+J`Oyie>h0UU$sotW{Tq&o47u(g-6DbwBp4IqsAa z26;U26UQGaCBulcS&KJVI^iub}k z776KY1nN!7&<(=Z)B09gf)G}Ot25rc*koOOm3_T?7F32j|(&Zdqhe7UxNieq?P9oX1`qGeI% z%KW_Q|GgjK7>=okc+hF5-l!)=0J>JP)JMA1jTqUJczI*9okxzv9qNA+q2)lw*g-UPK@Yge_r%GQ8c=8 z|K^kP-vI*U^_n~*&joMteOaytXJc_<6(#ua*WCBxr*Q!UPttPQ)0#Q}xvIr)o&5bj ze@=+ed!!wl&BHtD6XaO!!yf?$VNM0;H>|V01WuG+_xQ6Bx%^i{8ggt4Mxi&`?+h0n z4qU#ZYa;%XHJl3+2=P@SHo6h$oUiZ7K9gvK$*?CmCb=KR+ zimLLuT;@^KawNM5f&vmj!%bi4W_Db@;fa{VO)akFo^OFtcvjeRRHoVIwWu$Mi3J}4 z!07q<;&N(5HC9OmY4(Z;WA@E>`?t`_PvQ!fC;B!866~rEFF+1;*drnb2wM24bQq~` zxo+XQ+9~S6@twogaTK189<36fy#&cQRw}VytKw$Rc0XDbH?wrkSDi?E9g)`SZKVD} zlTC)7bcup)u)yW}s=WU6N_>^Xo3dUNH9ggfN0ddYY>cG6G%gHBYjR}B>(i$0Kf(oj zj!vIkjBQ@+@^<*(;ZxW=ETkhQ=UUmz(61V}J%=Ep#`F&SDlEQ@!-WhcZqUXRDacEUHl5uaY{n2;J+I!cIU$+94zPGvmxu)-zfqLLCQ1YArb;fc8U-kya#YAfF zb(B1-{Gt&dj6t|O6Z)qozQmZ*R;@P`Kfxhi^yOfwX&1}FGry{J)dmuRPWE|UxUhxf zvA@dkHydR?-+jsKH{^Wh?6d#Q5km%$5@4GQ*wqi9J*aGV+y`)CX{!XB`ar5(&wSgJ zR(*eYi^uC60+)2OeQoi3qTczx%ttt+85jyzgTYtM}AbnZ(SD09V_Kc^FqX z9I;+RQ2d(=4-=lgHTibl-3%~Y%t0m>ihUTKlp%q8uJ@$f5)zqWdv4%ruch^H2u zQUH91)b1=MD;w$NC(Y?&G0Mf3hV3))CVtxWNsdIii3no0u~PR`$?FiF zURwM&Gnw9Vm**Fk-E9r_3$Q_>dVLVsRj+Lwm!lSiYwuy6mk#}C(A9|lkg~J?2I{W* z30EYyXX=wQdu*Ft;(?5*;kR3VFYEfWsi|>3+62_3x{(>0e+EyxbBYhV{5Pe=cf~X6 z4wwjEbPp~nIT&5&KrRBG8t-s4wJqt1cUSTy##_C~>S^&(tzsPxpX9y;0?ke-&2)Hq zoFRjObMR&Y5p`-r|s#p7)IXugziQKWG}4TQScwOs)Jm`6i>F z4uc8bd7z=vrzjQX_@)I@({6|fBryPd_poIXd16?@iEvDP^`{?M|BT@6n21vqO6}kF zVgY+^veKUi>65?#BV;0fjnUC>$@Q9`cE^bWrQg)b`KRrWV;*t;Z)Q}VJ%$e({x@a%joX9X^3BFw-@ci*WeJ{u{jAd zJX^{JJex?rjzGvTe)*}TKLShgAUhdhODc+!9(QX2Q3{;qHNR}W9;H(KbhABflp`b8g0k+qym}|9` zTF@zij=ca5vWY7w2+;_0Nm{^=Ps1YA2xgF#-6Wuk7?M&@ zG4{Y|O5P2rfMt*9B;yfOY@-l|lM3lf(oZ`?Fd0CJfnoztO@_%L7hKZ(9hh z92DljU==&*!E4M)#Y5Eg8-!EZpc;9#EfaZDhQQ0}%E*~`Ho#&=l-FDAqgGzrUD@1V zc|^2!`FZHNKXiXrQl%7ih>E=-7V#KB90TPn!IYTe-45dXY1rd(uv3H$t*o|!aak0+ zrL3-6icXT&k;nxHCH@(5FiwVmgsER{@UodFjq~Zol#0F^Cl7WRUWviuBp7>neHDze z5`xZb;zx#36O}L_hlT*+VNo^GqW-W1w}ycaAcHYtB$|Ob(OR9uL{f#g$m6(pHZDL0 z7|78Xz`-Q|fxqBgMil_e2(~{iQ*?5FU(I5lVRAY43k~^fH(`{iG%Eb7dqii25Gp8W zdG6o1ZX6u(hxknBki6Q4krpS!8IV&%N@!m;vP_CPF2a0m06n|*T~>*O-XTvgw3Uoi-0PVO0j<&!Y~z#~r{Pf|-Ji>!D$l*0(3ILDE2+ie+aR zqcfYP)JQoU?O2j{p>s-(b^o{1QFzj@@NAY2-FXwyj*a^{-bIo2ME&k!;xGIO5Svj5 zcM?wF_uxDx+C2wQpUCHI<;hwujG3m;Lt~v&6kpH%7FyIUqq8;N0su`RaAUk@dq!_Q z9*kKpwqm0#gkt<}FiL!(jNDyLB^;*`eD-wv4e*tAVVz`zY}lu(?_wqe6QbME|Mv7l z8pe{@*37_p$a)@DW=0ieQs99C%eH1|Z|T7K^55rIYIl2|#e5R?H3FUWuQ(@ZL|ZoB zmWt8^D~V7?*80ksnwon6S}Jbq${<`+L=L>Lyb|Sd!HR(^)$JQR+wSgiNhPwg9KO_X z<}U&$N`g2of)3ISK1Jfx(=T5p`;<@DeO`!fi`1vnW4 zFBV-1kcm~c_La*o90EGW_H<9+5BiIc4Q6>9rByS4K~_zJF@yM62+W{~3kA4QNf%LX zQ1D>bt!O~~LyI#L(*sQ-{ZPl_FWcxTPxqC2 zp8ydPlJ1&J_idx9ZM}aIlr#wV0MV6-j7{uIakJwiDj7&az4+1xIcvx``_%y5etBz` zMK}Yjkq_OHSB4Y}3lud2u8E@_+fX|q2fYb>WrZV;`oSb7E={l7@Gx>(oN$|lJ!OT; zI1KtOpdohpr$_h@##q(>?i!T{20#T?qohBQk`Kcd)(;*NjrOt8xPkLYl5W*?vJ63z)!E7rB_c=mLFlqjng3*xZ-p|W;Jddqsvnr>t(_=s>!q$2 zODSj-HoD9bZ6hI^e29*comXWeC#-NS=xb0@yMffKDT97Th|&=uY_@^_iIk8D1hiQB zH$|x^aCCm#=!JoSq6}OF8L`Q9Kbi@|Fmctw_D4FnUx|Iba5wvrKUa9dKz@frMIHTO zeMoVB5|eqifkDy%8qMU=-*RGy7}*zjyRR)%xJ{Wty^!!kD3jcdp`cA3p#$LEBbUJD z6E{@`P}8ClYh>QNFg~A#-77>sZ7ti*G*M53+L=aH+7nx zG}`mNC!oFMN)-?fkYWH5E3!RGIHSrqRZYf~!PhOH`tPP;v*2S^FoIPKykwh%u%Z3Nq4zJ(ZEfnTxz~DN<}ktM#GaT>(-&d!~SbJ|x5ih>&h#aH);y zM!~Nv0JWr;mE{%+5g}FfaBb$@YZ8o-2sKJZDwx2(l1J?yA89Ix=Z6EtJ^{7Adx>4} zWwbi8hk{Zn^OJa z{YMf|CRX+oE35w_>u($Q>!o1p`d38aGA71Qe0yaQ5HFr}WnyYY-Tv~~-Av?_h5B$Q zrqw}P?*(ervln;VUV2cJI>^XF#V<<4nicIM>G4ee{$x0!=n*wLXxW5*D=pt?x+mo^GH=t!~%y|hA z!Tfvr0Yl20&%}eT-*E^Ms9Q>m5G$>2e>ho;NdEYuK2vJUAfRdZEFlQ9l?zK`?N1*F zpG}z4TCIfmWfGvu9q;|=Z6ELsN5My%V*N#D_HLisN<~&M>Fv@dIduqZ6sT~6A^@!Y zk}^zwU0j#xH#w2;;U5{}!+GIbZ+Fm;fzT#UTPD#s>~Muim@^Ps9w3%6`;GL@ zm4c^BzD$xJI{9ma2vtdgx|qnl;(sk=RVQwMu}qWaL)6PKMjIG@wi8^nV&bRHfNWy^ zjr-v6nKnZjioihlXV0FT`c9?bBZPP=6X_$tg#B!OCqf-xY|fxG&Ipp?9sf(WuO#tn z96J!V;A2dyQO7!&rd!*QwV(wXW5}32bnCk-4e2Aq8`7Go63n^+oO>gxTV8)4c)_Ot z_aO10u~Y2tHwLq`pN1~A-FgG(U2e0Vzb~yDH3vu*RQxi^*DW3(mAaT|^KTa$Y`qFr z3vp43l~{RkUIyifixI+-COSxi?A&m#LYCbTDko{8Ovn|+G~g~}s^^Jh^!Za-;9%bP zYp+#b0hMT+vLT8%G_OCHuc}PkpiZ9s{G48}WVQ6o)UN=mvWh-d{jEcp>+X2;)%tb% zalMJ!0j!2j9=cH7*<#TN4bqp@x&2;5>ZqG-AYad#^Nap4)wWutn+^wz$fgAWRSw@J z8|fzsHH z8p zH9>_Wt~EI=S&FHc{4F1DlM$0A3?KjmMsvM6Pw+{(+JW_vi*F?aUOs&A$`2}{zQLX?3zG3P_XAp8&^^LtgO zJe*SeD8ccDiBLVEi+I{Tn|AM_Uf^ullj4aHJ~})#T1E)#2?O=Q^O=2dhEK*C^8!N| zTnw2rX?B7|QyH-|VhiU=qszn55Y9IQ`p3ewc(B^RDyXq~w_^FZ|8Rk-)Vn5ZkGh%5 zN3I6fKdEAA)q~KKkBjDt``dpLRxJJQplC7tBv`LTA*gcM7=glMmQr$$#?@AxDmA;; zAFW5QFG2wK3&hjMcIiwte+{(UK|!6E`aYw+bD=->#rm@wTkji$Us@lb299l4vee!jqO7{lkC8@G&~;a?K@ zzuT9(kHRJE3C)ptYD|j3-1JXj9H!cW_0$|vpYc_%%_6^6U6CKAzw3Zo@69rcREo+r z2omT{h6nEh&|cBRJxt`^2=z$1nU#tO1&EsK98?ez^%1KF>DX|lk|Sj&;VNy`M($b6 zB6BgeDLV)U8F*tYZ$g12df6kjp}Su^H-zmT`vk_R6xY%$x6>&8d?|(?o>k6*A6dxGv=eeKpmZ)93h^RAanv!isP&rmPgKjq(ljh)omldQRvoQ zWdj*5h|DPO@Pb`ysQva?O+9tvNok>XbG<|nqJ(|trp&I?JZ5AOwve`P9 z@Ab-CkKR$M^P@Y2GJSs-OUQEhh(o#UM4V(6zylHuS`K#Y*TGDC3exP{Aj0EUCtxq( zuf*N}HDh07mtjS~qG!G)UjZA2IO!Y^$ntfZAX1oUShHn8l1_M8aZ=C6CHP>zgF2{U zmak7VVwQRxjnO_ID1c@F0F#IXzH1HZ)emylBljUlVx%nu_p3{p;5Z#Yr^^DBwe;6& zZXx^3BY4Jh6tWg;#G{3#>L)T-c_sx-UPZ&KRiDkt3o}HXO9}Sm=a9@s5GtRrN*P8& zYUHp{3c54LgF1*<+$I4NH)3O9E}5T?cH=q%m~Ah0q->fte^5n}GlHq>L#j~USr+*X z5<7>Hex~|O&VgRqT4G7Usr@;}_I}m;WoByjIEBBXO@G-{NscF7=X)lP4 zX~)qELtx6&7=!u#J2vV$1y{*5rH%6sY-9ks^?i=M(wAJ*TAn5u%L5Yo5SsH6Wm_jc z5G%(GlmKUR?wI}2mQC9tsnC+i(AEAZ z^W`wmhtXK*xIp8p0SxKE zAOUcKSBnAue3QYlAXW}aDo3OehKmdHEp?WF?Xpfogzea{d94IN3j5X)p@JYX2`^oW zB)}*$M{iU=vOF+GZ+oxxa=z6ZoAB$Z0_C;MT2iV(`=kl)(NjkvL-)mi$wOJ( z2BzmQdjaCa(0GB{hH=SS65U#Ip(`9}36Xv5OXo^mJ3L75Z7^CW14|!lX^)BQRn-LV=h&2Q?v357F{Ez%>b8%0I>Sa?-7m)4bOeIm0fUx}t zoqivHyl#u8RH7W=G)mfjA9DN-f85K4Our6+>LLKrkA817%|`8UmA%PUWC*kCu&cBd z=Zz>FWX-d*W1L6Y7v1>h3$w1Fx+yBkSYLw(@;j~WPC(7+nmJ=pKod!nv#8fp$G04~ zZ*%uwEf(tJJ+d)@jgVhdWEl8~6p4F`biW|8nL`>^+3O9T+|?L6ckNa*3O_T1ROT$9 ztf;%9ooCh8_Lk#=uV(5h?v~Lu0H*SIZnfDAaC2|2J?s zfdi}(UR9>MV!11n7`+w)9ZsvOdo25^Al?Z^$aCzleA@irN>?`>X^uJ>ZB5RNjBG|yxJHdMWEBq|j>_7@ zHSGnm-vbu(eiL}a?wyxqE zLUTso+&292tV{nlD4KEEFc5H|@SH{bG99k27T5A`KFvZPW-ceK9`!ps;vehdchq1N z3tT1dV+-?kk@MKtJToz9J3;{^BD@eZkO?4ws{sHpWSjI1Xl4HJR(soUd$&NE_N4jp zKr!SJhVn00#Z^N)@~@hZVIp~q$mCi_^5YUm`H3K*Xo&iOpV!Sbn;+fVCQv8=0u32D zRZ3<0`AU_nRan>TX3z&3ht=l=Gb(~qe}Hz$w|3`Q-73h2cFCd}Fwq*Tc>=Fx@|4Li zFL3Pndb-(7N&N>HL&jX4kT7DeI@F@~QqbvQ_(lw`tl^CG5D#QtKc{n1U6gAqiIRZ^Ml~Tnf^MiBx4mCh+>2ksCGq^{GOqlYIKfW7O3>9%kPse73blcQ@_p zhU03iOla5Bx&u@s0V`(^8_BtP%p6zI@QPYs1p`|kiCK3;Gq5S{H33rYB zQfZJq88#)m!ZnDehd>Ht^oA;|5ynR`xhB26>%@}^Wv7;kZ)2k$EdIVVI#@og3jS9P zt-Q%YQeh<+WrICZb^@xp#MNPnP8m$zIl6vYeN1Zx))HF96?*KUU0TP=iMiDNSQUOe z-lYtW|47{MBN1UX12+bo*>V`d!02c*Ug>l}Xj%@)6sYw23d!bP1i|-xZ4eBa zr#>S$e2L}#YvuEJPWtP+^q1>(jRja)45-ea)`roSB6#=0h>>6^s+Vtv?bKf0mvPD0 zSK`>Skmt-YxSncd(gJ-(-jY9omBVh`C>S@I>{3p;72mQU#tGJ7^3`A2EIm4f58HST za8NGHSx-hxo(iyXMtI5ilhd~5gPjVK7${YKdo6a;XZxVqHr`O|h99waejOGlv+uYX zZ`?-_#8dO)$=%w18~Y0;ql7urEjib@w{3`b)24-LVhDcd@Gup*M&`68F2wQG<{GQX z8Y4{Hz$7u~j!V)VmCNl>qO%+dnn^+LrRLEo(@!5+ujL@^TM!@rKzG3qD`$%{U}&%e zes~v&gv&$O{CjjLl^MG0)wJWXJ471qX~+el+Z~rov}q5Z>Tuhl>1-5}AH_ko*KCQf zw)0tlbvOp;GTwI7eM{%Qz}`;OF|x^~(72@g@wuTJ9DEYmLy5-slqW;*j4jE=(-yC6 z)&gj4v#s6^o7aZ%I14-lQtc% zts~O+0UkIM&iwwmhs~xOP!)!DU4C#c?fzOUkJUZ&{A}oRt5I{*UWTKX z7$!%+eu|280`>yv$STCvChCqRi-u*09W$%{a%wX7*fflvWQYlj3Jtmjt`a?MblLXA zYLxf=;>zE)it9ahp7(5GGI@VhaCaE2laFz0;ze-;Jt*`R0A6OCdtlkr(oHZ3t7Spv zrU-fWqMn)n=clo}mO$L2T%S0k=aWS4gylG;Fz7kzaa0&jd1f1)7P)5Zo(_}8aoYGA z1pvZ34hty8@BX62ih;{HF-@Q!i_hSIgeop}ly@)FCvc8yRWwSt5HVt$qijC@-!i*w z-g9jcq_TYD&&|2J0fEZpMx83q4wh{JwmMgV*mR^eTfpcA3B6p#{4ItSsP9*}BN1hj z4}PFRs~JzDM1V;%KH&{aIqzW=g$HNVb9E+JRF2P4GtfQVcNhDKt1940cDYb#JU%z zNT3~IC}(-DC52~|&)gvgcSzufXS3Mtxr{2VRamZ-EZRoS-68>Ek3$4`t~nO9V~T6Z z++`yHZ1R~g&t@4Cz?qG77V;A6z?9=W;T?Y5c9bs`=RkpW$and_gLbfyHoX^do`iE@%lD_{ zwbbROIjJ^of7f>IUA9D!&3f1JI-zyV`_|BR#&-8ZCG+~M+`MM6^ghdi%5&HY3s!x& zDq!*VBKfO66bk0~bhF=+4sn$nrYYL@>)joEXRa wkg~8Rhy|=>fAnGSu6}}Yec;EVAD`U+sQmQx$7lb2e2)Gk)A;n9gausw2j`Y0q5uE@ literal 0 HcmV?d00001 diff --git a/sql/20200518.sql b/sql/20200518.sql new file mode 100644 index 0000000..25cf191 --- /dev/null +++ b/sql/20200518.sql @@ -0,0 +1 @@ +INSERT INTO `novel_plus`.`crawl_source` (`id`, `source_name`, `crawl_rule`, `source_status`, `create_time`, `update_time`) VALUES ('4', '书趣阁', '{\r\n \"bookListUrl\": \"http://m.shuquge.com/sort/{catId}/0_{page}.html\",\r\n \"catIdRule\": {\r\n \"catId1\": \"1\",\r\n \"catId2\": \"2\",\r\n \"catId3\": \"3\",\r\n \"catId4\": \"4\",\r\n \"catId5\": \"7\",\r\n \"catId6\": \"6\",\r\n \"catId7\": \"8\"\r\n },\r\n \"bookIdPatten\": \"href=\\\"/s/(\\\\d+)\\\\.html\\\"\",\r\n \"pagePatten\": \"第(\\\\d+)/\\\\d+页\",\r\n \"totalPagePatten\": \"第\\\\d+/(\\\\d+)页\",\r\n \"bookDetailUrl\": \"http://m.shuquge.com/s/{bookId}.html\",\r\n \"bookNamePatten\": \"

    ([^/]+)

    \",\r\n \"authorNamePatten\": \"

    作者:([^/]+)

    \",\r\n \"picUrlPatten\": \"src=\\\"(http://www.shuquge.com/files/article/image/\\\\d+/\\\\d+/\\\\d+s\\\\.jpg)\\\"\",\r\n \"statusPatten\": \"

    状态:([^/]+)

    \",\r\n \"bookStatusRule\": {\r\n \"连载中\": 0,\r\n \"完本\": 1\r\n },\r\n \"descStart\": \"
    \",\r\n \"descEnd\": \"最新章节推荐地址\",\r\n \"bookIndexUrl\": \"http://www.shuquge.com/txt/{bookId}/index.html\",\r\n \"bookIndexStart\": \"》正文卷\",\r\n \"indexIdPatten\": \"
    [^/]+
    \",\r\n \"indexNamePatten\": \"
    ([^/]+)
    \",\r\n \"bookContentUrl\": \"http://www.shuquge.com/txt/{bookId}/{indexId}.html\",\r\n \"contentStart\": \"
    \",\r\n \"contentEnd\": \"http://www.shuquge.com\"\r\n}', '1', '2020-05-18 12:02:34', '2020-05-18 12:02:34'); diff --git a/sql/novel_plus.sql b/sql/novel_plus.sql index 86d510c..5f19a9b 100644 --- a/sql/novel_plus.sql +++ b/sql/novel_plus.sql @@ -2,19 +2,65 @@ Navicat MySQL Data Transfer Source Server : localhost -Source Server Version : 50624 +Source Server Version : 50725 Source Host : localhost:3306 -Source Database : novel_biz +Source Database : novel_plus Target Server Type : MYSQL -Target Server Version : 50624 +Target Server Version : 50725 File Encoding : 65001 -Date: 2020-05-02 10:53:04 +Date: 2020-05-18 13:59:04 */ SET FOREIGN_KEY_CHECKS=0; +-- ---------------------------- +-- Table structure for author +-- ---------------------------- +DROP TABLE IF EXISTS `author`; +CREATE TABLE `author` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '主键', + `user_id` bigint(20) DEFAULT NULL COMMENT '用户ID', + `invite_code` varchar(20) DEFAULT NULL COMMENT '邀请码', + `pen_name` varchar(20) DEFAULT NULL COMMENT '笔名', + `tel_phone` varchar(20) DEFAULT NULL COMMENT '手机号码', + `chat_account` varchar(50) DEFAULT NULL COMMENT 'QQ或微信账号', + `email` varchar(50) DEFAULT NULL COMMENT '电子邮箱', + `work_direction` tinyint(4) DEFAULT NULL COMMENT '作品方向,0:男频,1:女频', + `status` tinyint(4) DEFAULT '0' COMMENT '0:正常,1:封禁', + `create_time` datetime DEFAULT NULL COMMENT '创建时间', + PRIMARY KEY (`id`) +) ENGINE=InnoDB AUTO_INCREMENT=3 DEFAULT CHARSET=utf8mb4 COMMENT='作者表'; + +-- ---------------------------- +-- Records of author +-- ---------------------------- +INSERT INTO `author` VALUES ('1', null, 'reerer', 'abc', '13560487656', '23484388', '23484388@qq.com', '0', '0', null); +INSERT INTO `author` VALUES ('2', '1255060328322027520', 'rwrr445554', '梦入神机', '13560421324', '1179705413', 'reerer@qq.com', '0', '0', '2020-05-13 14:01:31'); + +-- ---------------------------- +-- Table structure for author_code +-- ---------------------------- +DROP TABLE IF EXISTS `author_code`; +CREATE TABLE `author_code` ( + `id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '主键', + `invite_code` varchar(100) DEFAULT NULL COMMENT '邀请码', + `validity_time` datetime DEFAULT NULL COMMENT '有效时间', + `is_use` tinyint(1) DEFAULT '0' COMMENT '是否使用过,0:未使用,1:使用过', + `create_time` datetime DEFAULT NULL COMMENT '创建时间', + `create_user_id` bigint(20) DEFAULT NULL COMMENT '创建人ID', + PRIMARY KEY (`id`), + UNIQUE KEY `key_code` (`invite_code`) USING BTREE +) ENGINE=InnoDB AUTO_INCREMENT=6 DEFAULT CHARSET=utf8mb4 COMMENT='作家邀请码表'; + +-- ---------------------------- +-- Records of author_code +-- ---------------------------- +INSERT INTO `author_code` VALUES ('3', 'reerer', '2020-05-27 22:43:45', '1', '2020-05-13 11:40:56', '1'); +INSERT INTO `author_code` VALUES ('4', '123456', '2020-05-28 00:00:00', '0', '2020-05-13 14:09:55', '1'); +INSERT INTO `author_code` VALUES ('5', 'ww34343', '2020-05-21 00:00:00', '0', '2020-05-13 14:18:58', '1'); + -- ---------------------------- -- Table structure for book -- ---------------------------- @@ -49,7 +95,7 @@ CREATE TABLE `book` ( UNIQUE KEY `key_uq_bookName_authorName` (`book_name`,`author_name`) USING BTREE, KEY `key_lastIndexUpdateTime` (`last_index_update_time`) USING BTREE, KEY `key_createTime` (`create_time`) USING BTREE -) ENGINE=InnoDB AUTO_INCREMENT=1256127379949019137 DEFAULT CHARSET=utf8mb4 COMMENT='小说表'; +) ENGINE=InnoDB AUTO_INCREMENT=1262260513468559361 DEFAULT CHARSET=utf8mb4 COMMENT='小说表'; -- ---------------------------- -- Records of book @@ -156,7 +202,7 @@ CREATE TABLE `book_content` ( `content` mediumtext COMMENT '小说章节内容', PRIMARY KEY (`id`), UNIQUE KEY `key_uq_indexId` (`index_id`) USING BTREE -) ENGINE=InnoDB AUTO_INCREMENT=3342428 DEFAULT CHARSET=utf8mb4 COMMENT='小说内容表'; +) ENGINE=InnoDB AUTO_INCREMENT=3347665 DEFAULT CHARSET=utf8mb4 COMMENT='小说内容表'; -- ---------------------------- -- Records of book_content @@ -179,7 +225,7 @@ CREATE TABLE `book_index` ( UNIQUE KEY `key_uq_bookId_indexNum` (`book_id`,`index_num`) USING BTREE, KEY `key_bookId` (`book_id`) USING BTREE, KEY `key_indexNum` (`index_num`) USING BTREE -) ENGINE=InnoDB AUTO_INCREMENT=1256373101432717313 DEFAULT CHARSET=utf8mb4 COMMENT='小说目录表'; +) ENGINE=InnoDB AUTO_INCREMENT=1262260612777095169 DEFAULT CHARSET=utf8mb4 COMMENT='小说目录表'; -- ---------------------------- -- Records of book_index @@ -303,13 +349,14 @@ CREATE TABLE `crawl_source` ( `create_time` datetime DEFAULT NULL COMMENT '创建时间', `update_time` datetime DEFAULT NULL COMMENT '更新时间', PRIMARY KEY (`id`) -) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8mb4 COMMENT='爬虫源表'; +) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8mb4 COMMENT='爬虫源表'; -- ---------------------------- -- Records of crawl_source -- ---------------------------- INSERT INTO `crawl_source` VALUES ('2', '百书斋', '{\r\n \"bookListUrl\": \"https://m.baishuzhai.com/blhb/{catId}/{page}.html\",\r\n \"catIdRule\": {\r\n \"catId1\": \"1\",\r\n \"catId2\": \"2\",\r\n \"catId3\": \"3\",\r\n \"catId4\": \"4\",\r\n \"catId5\": \"5\",\r\n \"catId6\": \"6\",\r\n \"catId7\": \"7\"\r\n },\r\n \"bookIdPatten\": \"href=\\\"/ibook/(\\\\d+/\\\\d+)/\\\"\",\r\n \"pagePatten\": \"value=\\\"(\\\\d+)/\\\\d+\\\"\",\r\n \"totalPagePatten\": \"value=\\\"\\\\d+/(\\\\d+)\\\"\",\r\n \"bookDetailUrl\": \"https://m.baishuzhai.com/ibook/{bookId}/\",\r\n \"bookNamePatten\": \"([^/]+)\",\r\n \"authorNamePatten\": \">作者:([^/]+)<\",\r\n \"picUrlPatten\": \"]+)\\\"\\\\s+onerror=\\\"this.src=\",\r\n \"statusPatten\": \"状态:([^/]+)\",\r\n \"bookStatusRule\": {\r\n \"连载\": 0,\r\n \"完成\": 1\r\n },\r\n \"scorePatten\": \"([^<]+)\",\r\n \"descStart\": \"

    \",\r\n \"descEnd\": \"

    \",\r\n \"upadateTimePatten\": \"更新:(\\\\d+-\\\\d+-\\\\d+)\",\r\n \"upadateTimeFormatPatten\": \"yy-MM-dd\",\r\n \"bookIndexUrl\": \"https://m.baishuzhai.com/ibook/{bookId}/all.html\",\r\n \"indexIdPatten\": \"[^/]+\",\r\n \"indexNamePatten\": \"([^/]+)\",\r\n \"bookContentUrl\": \"https://baishuzhai.com/ibook/{bookId}/{indexId}.html\",\r\n \"contentStart\": \"id=\\\"content\\\">\",\r\n \"contentEnd\": \"