Files
hot-news-api/app/api/v1/daily_news.py
2026-03-26 15:04:59 +08:00

296 lines
8.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# app/api/endpoints/dailynews.py
import json
from datetime import datetime
from typing import List, Dict, Any, Optional
import pytz
from fastapi import APIRouter
from app.core import cache
from app.services import crawler_factory
from app.utils.logger import log
router = APIRouter()
@router.get("/")
def get_hot_news(date: str = None, platform: str = None):
if platform not in crawler_factory.keys():
return {
"status": "404",
"data": [],
"msg": "`platform` is required, valid platform: " + ", ".join(crawler_factory.keys())
}
if not date:
date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d")
cacheKey = f"crawler:{platform}:{date}"
result = cache.get(cacheKey)
if result:
return {
"status": "200",
"data": json.loads(result),
"msg": "success"
}
return {
"status": "200",
"data": [],
"msg": "success"
}
@router.get("/all")
def get_all_platforms_news(date: str = None):
"""
获取所有平台的热门新闻
Args:
date: 日期格式为YYYY-MM-DD默认为当天
Returns:
包含所有平台新闻的字典,键为平台名称,值为新闻列表
"""
if not date:
date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d")
all_news = {}
for platform in crawler_factory.keys():
cacheKey = f"crawler:{platform}:{date}"
result = cache.get(cacheKey)
if result:
try:
all_news[platform] = json.loads(result)
except Exception as e:
log.error(f"Error parsing cached data for {platform}: {e}")
all_news[platform] = []
else:
all_news[platform] = []
return {
"status": "200",
"data": all_news,
"msg": "success"
}
@router.get("/multi")
def get_multi_platforms_news(date: str = None, platforms: str = None):
"""
获取多个平台的热门新闻
Args:
date: 日期格式为YYYY-MM-DD默认为当天
platforms: 平台列表,以逗号分隔,例如 "weibo,baidu,zhihu"
Returns:
包含指定平台新闻的字典,键为平台名称,值为新闻列表
"""
if not date:
date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d")
if not platforms:
return {
"status": "404",
"data": {},
"msg": "`platforms` parameter is required, format: comma-separated platform names"
}
platform_list = [p.strip() for p in platforms.split(",")]
valid_platforms = crawler_factory.keys()
# 验证平台是否有效
invalid_platforms = [p for p in platform_list if p not in valid_platforms]
if invalid_platforms:
return {
"status": "404",
"data": {},
"msg": f"Invalid platforms: {', '.join(invalid_platforms)}. Valid platforms: {', '.join(valid_platforms)}"
}
multi_news = {}
for platform in platform_list:
cacheKey = f"crawler:{platform}:{date}"
result = cache.get(cacheKey)
if result:
try:
multi_news[platform] = json.loads(result)
except Exception as e:
log.error(f"Error parsing cached data for {platform}: {e}")
multi_news[platform] = []
else:
multi_news[platform] = []
return {
"status": "200",
"data": multi_news,
"msg": "success"
}
@router.get("/search")
def search_news(keyword: str, date: str = None, platforms: str = None, limit: int = 20):
"""
搜索新闻
Args:
keyword: 搜索关键词
date: 日期格式为YYYY-MM-DD默认为当天
platforms: 平台列表,以逗号分隔,例如 "weibo,baidu,zhihu",默认搜索所有平台
limit: 返回结果数量限制默认为20
Returns:
包含搜索结果的字典,键为状态码、数据、消息、总结果数量和搜索结果数量
"""
if not date:
date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d")
# 确定要搜索的平台
if platforms:
platform_list = [p.strip() for p in platforms.split(",")]
valid_platforms = crawler_factory.keys()
platform_list = [p for p in platform_list if p in valid_platforms]
else:
platform_list = list(crawler_factory.keys())
if not platform_list:
return {
"status": "404",
"data": [],
"msg": "No valid platforms specified",
"total": 0,
"search_results": 0
}
# 从各平台获取新闻数据
all_news = []
for platform in platform_list:
cacheKey = f"crawler:{platform}:{date}"
result = cache.get(cacheKey)
if not result:
continue
try:
platform_news = json.loads(result)
if not isinstance(platform_news, list):
continue
# 为每条新闻添加平台信息
for idx, item in enumerate(platform_news):
if not isinstance(item, dict):
continue
# 处理rank字段
rank_value = ""
if "rank" in item and item["rank"]:
rank_value = str(item["rank"]).replace("#", "")
elif "index" in item and item["index"]:
rank_value = str(item["index"]).replace("#", "")
else:
rank_value = str(idx + 1)
# 获取分类信息
category = _get_category_for_platform(platform)
sub_category = _get_subcategory_for_platform(platform)
# 构建标准化的新闻条目
item_with_source = {
"id": item.get("id"),
"title": item.get("title", ""),
"source": platform,
"rank": rank_value,
"category": category,
"sub_category": sub_category,
"url": item.get("url", "")
}
all_news.append(item_with_source)
except Exception as e:
log.error(f"Error processing news from {platform}: {e}")
# 搜索关键词
search_results = []
for item in all_news:
if keyword.lower() in item["title"].lower():
search_results.append(item)
# 按站点分组,每个站点内按排名排序
grouped_results = {}
for item in search_results:
source = item["source"]
if source not in grouped_results:
grouped_results[source] = []
grouped_results[source].append(item)
# 对每个站点内的结果按排名排序
for source, items in grouped_results.items():
# 按排名排序(直接比较数字)
items.sort(key=lambda x: int(x["rank"]) if x["rank"].isdigit() else 999)
# 重新组合排序后的结果
sorted_results = []
for source, items in grouped_results.items():
sorted_results.extend(items)
# 限制返回结果数量
limited_results = sorted_results[:limit]
return {
"status": "200",
"data": limited_results,
"msg": "success",
"total": len(search_results),
"search_results": len(limited_results)
}
def _get_category_for_platform(platform: str) -> str:
"""根据平台返回对应的分类"""
categories = {
"36kr": "科技创业",
"hupu": "体育",
"sspai": "科技",
"weibo": "社交",
"zhihu": "知识",
"baidu": "综合",
"tieba": "社区",
"douban": "文化",
"bilibili": "视频",
"v2ex": "科技",
"github": "开发者",
"hackernews": "科技",
"stackoverflow": "开发者",
"jinritoutiao": "资讯",
"douyin": "娱乐",
"shaoshupai": "科技"
}
return categories.get(platform, "其他")
def _get_subcategory_for_platform(platform: str) -> str:
"""根据平台返回对应的子分类"""
subcategories = {
"36kr": "商业资讯",
"hupu": "娱乐",
"sspai": "数码",
"weibo": "热门",
"zhihu": "问答",
"baidu": "热搜",
"tieba": "讨论",
"douban": "影视",
"bilibili": "热门",
"v2ex": "技术",
"github": "开源",
"hackernews": "国际",
"stackoverflow": "问答",
"jinritoutiao": "热点",
"douyin": "娱乐",
"shaoshupai": "数码"
}
return subcategories.get(platform, "其他")