init
This commit is contained in:
24
test/crawler_test.py
Normal file
24
test/crawler_test.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from datetime import datetime
|
||||
|
||||
import pytz
|
||||
|
||||
timezone = pytz.timezone('Asia/Shanghai')
|
||||
now_time = datetime.now(timezone)
|
||||
date_str = now_time.strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
class TestCrawler:
|
||||
|
||||
def test_init(self):
|
||||
pass
|
||||
|
||||
def test_crawler(self):
|
||||
from app.service.sites import BilibiliCrawler
|
||||
|
||||
crawler = BilibiliCrawler()
|
||||
crawler.fetch(date_str)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test = TestCrawler()
|
||||
test.test_crawler()
|
||||
58
test/hackernews_test.py
Normal file
58
test/hackernews_test.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
# 添加项目根目录到系统路径
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
from app.services.sites.hackernews import HackerNewsCrawler
|
||||
|
||||
def test_hackernews_crawler():
|
||||
"""测试Hacker News爬虫"""
|
||||
print("===== 测试 Hacker News 爬虫 =====")
|
||||
crawler = HackerNewsCrawler()
|
||||
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||
|
||||
print("1. 使用requests方式测试:")
|
||||
result = crawler._fetch_with_requests()
|
||||
if result and len(result) > 0:
|
||||
print(f" - 成功获取到 {len(result)} 条新闻")
|
||||
print(" - 第一条新闻示例:")
|
||||
print(f" 标题: {result[0]['title']}")
|
||||
print(f" 链接: {result[0]['url']}")
|
||||
print(f" 内容: {result[0]['content']}")
|
||||
else:
|
||||
print(" - 使用requests方式获取失败")
|
||||
|
||||
print("\n2. 使用浏览器方式测试:")
|
||||
from app.services.browser_manager import BrowserManager
|
||||
browser_manager = BrowserManager()
|
||||
|
||||
try:
|
||||
result = crawler._fetch_with_browser(browser_manager)
|
||||
if result and len(result) > 0:
|
||||
print(f" - 成功获取到 {len(result)} 条新闻")
|
||||
print(" - 第一条新闻示例:")
|
||||
print(f" 标题: {result[0]['title']}")
|
||||
print(f" 链接: {result[0]['url']}")
|
||||
print(f" 内容: {result[0]['content']}")
|
||||
else:
|
||||
print(" - 使用浏览器方式获取失败")
|
||||
except Exception as e:
|
||||
print(f" - 浏览器测试异常: {str(e)}")
|
||||
|
||||
print("\n3. 测试完整的fetch方法:")
|
||||
result = crawler.fetch(date_str)
|
||||
if result and len(result) > 0:
|
||||
print(f" - 成功获取到 {len(result)} 条新闻")
|
||||
print(" - 结果示例(前3条):")
|
||||
for i, news in enumerate(result[:3]):
|
||||
print(f" [{i+1}] {news['title']}")
|
||||
else:
|
||||
print(" - fetch方法获取失败")
|
||||
|
||||
print("\n===== 测试完成 =====")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_hackernews_crawler()
|
||||
Reference in New Issue
Block a user