novel/novel-front/src/main/resources/application-crawl.yml

50 lines
2.4 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#爬取的网站名称类型 1笔趣岛 2笔趣塔, 3:顶点 更多网站解析中,敬请期待
biquta:
crawlsource:
enabled: true #是否开启此爬虫源
index-url: https://m.biquta.la
list-page-url: https://m.biquta.la/class/{0}/{1}.html
book-url-pattern: href="/(\d+_\d+)/"
score-pattern: <div\s+class="score">(\d+\.\d+)分</div>
book-name-pattern: <p class="title">([^/]+)</p>
author-pattern: 作者:([^/]+)<
status-pattern: 状态:([^/]+)</li>
cat-pattern: 类别:([^/]+)</li>
update-time-pattern: 更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a>
pic-pattern: <img src="([^>]+)"\s+onerror="this.src=
intro-pattern: class="review">([^<]+)</p>
catalog-url-pattern: <a\s+href="(/du/\d+_\d+/)">查看完整目录</a>
catalog-pattern: <a\s+style=""\s+href="(/\d+_\d+/\d+\.html)">([^/]+)</a>
biqudao:
crawlsource:
enabled: true #是否开启此爬虫源
index-url: https://m.biqudao.com
list-page-url: https://m.biqudao.com/bqgeclass/{0}/{1}.html
book-url-pattern: href="/(bqge\d+)/"
score-pattern: <div\s+class="score">(\d+\.\d+)分</div>
book-name-pattern: <p class="title">([^/]+)</p>
author-pattern: <li class="author">作者:([^/]+)</li>
status-pattern: 状态:([^/]+)</li>
cat-pattern: 类别:([^/]+)</li>
update-time-pattern: 更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a>
pic-pattern: <img src="([^>]+)"\s+onerror="this.src=
intro-pattern: class="review">([^<]+)</p>
catalog-url-pattern: <a\s+href="(/bqge\d+/all\.html)">查看完整目录</a>
catalog-pattern: <a[^/]+style[^/]+href="(/bqge\d+/\d+\.html)">([^/]+)</a>
dingdian:
crawlsource:
enabled: true #是否开启此爬虫源
index-url: https://wap.dingdiann.com
list-page-url: https://wap.dingdiann.com/sort/{0}/{1}.html
book-url-pattern: href="/(ddk\d+)/"
score-pattern: <div\s+class="score">(\d+\.\d+)分</div>
book-name-pattern: <p class="title">([^/]+)</p>
author-pattern: 作者:([^/]+)<
status-pattern: 状态:([^/]+)</li>
cat-pattern: 类别:([^/]+)</li>
update-time-pattern: 更新:(\d+-\d+-\d+\s\d+:\d+:\d+)</a>
pic-pattern: <img src="([^>]+)"\s+onerror="this.src=
intro-pattern: class="review">([^<]+)</p>
catalog-url-pattern: <a\s+href="(/ddk\d+/all.html)">查看完整目录</a>
catalog-pattern: <a\s+style=""\s+href="(/ddk\d+/\d+\.html)">([^/]+)</a>