This commit is contained in:
2026-03-26 15:04:59 +08:00
commit e0af97ac7f
65 changed files with 7366 additions and 0 deletions

295
app/api/v1/daily_news.py Normal file
View File

@@ -0,0 +1,295 @@
# app/api/endpoints/dailynews.py
import json
from datetime import datetime
from typing import List, Dict, Any, Optional
import pytz
from fastapi import APIRouter
from app.core import cache
from app.services import crawler_factory
from app.utils.logger import log
router = APIRouter()
@router.get("/")
def get_hot_news(date: str = None, platform: str = None):
if platform not in crawler_factory.keys():
return {
"status": "404",
"data": [],
"msg": "`platform` is required, valid platform: " + ", ".join(crawler_factory.keys())
}
if not date:
date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d")
cacheKey = f"crawler:{platform}:{date}"
result = cache.get(cacheKey)
if result:
return {
"status": "200",
"data": json.loads(result),
"msg": "success"
}
return {
"status": "200",
"data": [],
"msg": "success"
}
@router.get("/all")
def get_all_platforms_news(date: str = None):
"""
获取所有平台的热门新闻
Args:
date: 日期格式为YYYY-MM-DD默认为当天
Returns:
包含所有平台新闻的字典,键为平台名称,值为新闻列表
"""
if not date:
date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d")
all_news = {}
for platform in crawler_factory.keys():
cacheKey = f"crawler:{platform}:{date}"
result = cache.get(cacheKey)
if result:
try:
all_news[platform] = json.loads(result)
except Exception as e:
log.error(f"Error parsing cached data for {platform}: {e}")
all_news[platform] = []
else:
all_news[platform] = []
return {
"status": "200",
"data": all_news,
"msg": "success"
}
@router.get("/multi")
def get_multi_platforms_news(date: str = None, platforms: str = None):
"""
获取多个平台的热门新闻
Args:
date: 日期格式为YYYY-MM-DD默认为当天
platforms: 平台列表,以逗号分隔,例如 "weibo,baidu,zhihu"
Returns:
包含指定平台新闻的字典,键为平台名称,值为新闻列表
"""
if not date:
date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d")
if not platforms:
return {
"status": "404",
"data": {},
"msg": "`platforms` parameter is required, format: comma-separated platform names"
}
platform_list = [p.strip() for p in platforms.split(",")]
valid_platforms = crawler_factory.keys()
# 验证平台是否有效
invalid_platforms = [p for p in platform_list if p not in valid_platforms]
if invalid_platforms:
return {
"status": "404",
"data": {},
"msg": f"Invalid platforms: {', '.join(invalid_platforms)}. Valid platforms: {', '.join(valid_platforms)}"
}
multi_news = {}
for platform in platform_list:
cacheKey = f"crawler:{platform}:{date}"
result = cache.get(cacheKey)
if result:
try:
multi_news[platform] = json.loads(result)
except Exception as e:
log.error(f"Error parsing cached data for {platform}: {e}")
multi_news[platform] = []
else:
multi_news[platform] = []
return {
"status": "200",
"data": multi_news,
"msg": "success"
}
@router.get("/search")
def search_news(keyword: str, date: str = None, platforms: str = None, limit: int = 20):
"""
搜索新闻
Args:
keyword: 搜索关键词
date: 日期格式为YYYY-MM-DD默认为当天
platforms: 平台列表,以逗号分隔,例如 "weibo,baidu,zhihu",默认搜索所有平台
limit: 返回结果数量限制默认为20
Returns:
包含搜索结果的字典,键为状态码、数据、消息、总结果数量和搜索结果数量
"""
if not date:
date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d")
# 确定要搜索的平台
if platforms:
platform_list = [p.strip() for p in platforms.split(",")]
valid_platforms = crawler_factory.keys()
platform_list = [p for p in platform_list if p in valid_platforms]
else:
platform_list = list(crawler_factory.keys())
if not platform_list:
return {
"status": "404",
"data": [],
"msg": "No valid platforms specified",
"total": 0,
"search_results": 0
}
# 从各平台获取新闻数据
all_news = []
for platform in platform_list:
cacheKey = f"crawler:{platform}:{date}"
result = cache.get(cacheKey)
if not result:
continue
try:
platform_news = json.loads(result)
if not isinstance(platform_news, list):
continue
# 为每条新闻添加平台信息
for idx, item in enumerate(platform_news):
if not isinstance(item, dict):
continue
# 处理rank字段
rank_value = ""
if "rank" in item and item["rank"]:
rank_value = str(item["rank"]).replace("#", "")
elif "index" in item and item["index"]:
rank_value = str(item["index"]).replace("#", "")
else:
rank_value = str(idx + 1)
# 获取分类信息
category = _get_category_for_platform(platform)
sub_category = _get_subcategory_for_platform(platform)
# 构建标准化的新闻条目
item_with_source = {
"id": item.get("id"),
"title": item.get("title", ""),
"source": platform,
"rank": rank_value,
"category": category,
"sub_category": sub_category,
"url": item.get("url", "")
}
all_news.append(item_with_source)
except Exception as e:
log.error(f"Error processing news from {platform}: {e}")
# 搜索关键词
search_results = []
for item in all_news:
if keyword.lower() in item["title"].lower():
search_results.append(item)
# 按站点分组,每个站点内按排名排序
grouped_results = {}
for item in search_results:
source = item["source"]
if source not in grouped_results:
grouped_results[source] = []
grouped_results[source].append(item)
# 对每个站点内的结果按排名排序
for source, items in grouped_results.items():
# 按排名排序(直接比较数字)
items.sort(key=lambda x: int(x["rank"]) if x["rank"].isdigit() else 999)
# 重新组合排序后的结果
sorted_results = []
for source, items in grouped_results.items():
sorted_results.extend(items)
# 限制返回结果数量
limited_results = sorted_results[:limit]
return {
"status": "200",
"data": limited_results,
"msg": "success",
"total": len(search_results),
"search_results": len(limited_results)
}
def _get_category_for_platform(platform: str) -> str:
"""根据平台返回对应的分类"""
categories = {
"36kr": "科技创业",
"hupu": "体育",
"sspai": "科技",
"weibo": "社交",
"zhihu": "知识",
"baidu": "综合",
"tieba": "社区",
"douban": "文化",
"bilibili": "视频",
"v2ex": "科技",
"github": "开发者",
"hackernews": "科技",
"stackoverflow": "开发者",
"jinritoutiao": "资讯",
"douyin": "娱乐",
"shaoshupai": "科技"
}
return categories.get(platform, "其他")
def _get_subcategory_for_platform(platform: str) -> str:
"""根据平台返回对应的子分类"""
subcategories = {
"36kr": "商业资讯",
"hupu": "娱乐",
"sspai": "数码",
"weibo": "热门",
"zhihu": "问答",
"baidu": "热搜",
"tieba": "讨论",
"douban": "影视",
"bilibili": "热门",
"v2ex": "技术",
"github": "开源",
"hackernews": "国际",
"stackoverflow": "问答",
"jinritoutiao": "热点",
"douyin": "娱乐",
"shaoshupai": "数码"
}
return subcategories.get(platform, "其他")