# app/api/endpoints/dailynews.py import json from datetime import datetime from typing import List, Dict, Any, Optional import pytz from fastapi import APIRouter from app.core import cache from app.services import crawler_factory from app.utils.logger import log router = APIRouter() @router.get("/") def get_hot_news(date: str = None, platform: str = None): if platform not in crawler_factory.keys(): return { "status": "404", "data": [], "msg": "`platform` is required, valid platform: " + ", ".join(crawler_factory.keys()) } if not date: date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d") cacheKey = f"crawler:{platform}:{date}" result = cache.get(cacheKey) if result: return { "status": "200", "data": json.loads(result), "msg": "success" } return { "status": "200", "data": [], "msg": "success" } @router.get("/all") def get_all_platforms_news(date: str = None): """ 获取所有平台的热门新闻 Args: date: 日期,格式为YYYY-MM-DD,默认为当天 Returns: 包含所有平台新闻的字典,键为平台名称,值为新闻列表 """ if not date: date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d") all_news = {} for platform in crawler_factory.keys(): cacheKey = f"crawler:{platform}:{date}" result = cache.get(cacheKey) if result: try: all_news[platform] = json.loads(result) except Exception as e: log.error(f"Error parsing cached data for {platform}: {e}") all_news[platform] = [] else: all_news[platform] = [] return { "status": "200", "data": all_news, "msg": "success" } @router.get("/multi") def get_multi_platforms_news(date: str = None, platforms: str = None): """ 获取多个平台的热门新闻 Args: date: 日期,格式为YYYY-MM-DD,默认为当天 platforms: 平台列表,以逗号分隔,例如 "weibo,baidu,zhihu" Returns: 包含指定平台新闻的字典,键为平台名称,值为新闻列表 """ if not date: date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d") if not platforms: return { "status": "404", "data": {}, "msg": "`platforms` parameter is required, format: comma-separated platform names" } platform_list = [p.strip() for p in platforms.split(",")] valid_platforms = crawler_factory.keys() # 验证平台是否有效 invalid_platforms = [p for p in platform_list if p not in valid_platforms] if invalid_platforms: return { "status": "404", "data": {}, "msg": f"Invalid platforms: {', '.join(invalid_platforms)}. Valid platforms: {', '.join(valid_platforms)}" } multi_news = {} for platform in platform_list: cacheKey = f"crawler:{platform}:{date}" result = cache.get(cacheKey) if result: try: multi_news[platform] = json.loads(result) except Exception as e: log.error(f"Error parsing cached data for {platform}: {e}") multi_news[platform] = [] else: multi_news[platform] = [] return { "status": "200", "data": multi_news, "msg": "success" } @router.get("/search") def search_news(keyword: str, date: str = None, platforms: str = None, limit: int = 20): """ 搜索新闻 Args: keyword: 搜索关键词 date: 日期,格式为YYYY-MM-DD,默认为当天 platforms: 平台列表,以逗号分隔,例如 "weibo,baidu,zhihu",默认搜索所有平台 limit: 返回结果数量限制,默认为20 Returns: 包含搜索结果的字典,键为状态码、数据、消息、总结果数量和搜索结果数量 """ if not date: date = datetime.now(pytz.timezone('Asia/Shanghai')).strftime("%Y-%m-%d") # 确定要搜索的平台 if platforms: platform_list = [p.strip() for p in platforms.split(",")] valid_platforms = crawler_factory.keys() platform_list = [p for p in platform_list if p in valid_platforms] else: platform_list = list(crawler_factory.keys()) if not platform_list: return { "status": "404", "data": [], "msg": "No valid platforms specified", "total": 0, "search_results": 0 } # 从各平台获取新闻数据 all_news = [] for platform in platform_list: cacheKey = f"crawler:{platform}:{date}" result = cache.get(cacheKey) if not result: continue try: platform_news = json.loads(result) if not isinstance(platform_news, list): continue # 为每条新闻添加平台信息 for idx, item in enumerate(platform_news): if not isinstance(item, dict): continue # 处理rank字段 rank_value = "" if "rank" in item and item["rank"]: rank_value = str(item["rank"]).replace("#", "") elif "index" in item and item["index"]: rank_value = str(item["index"]).replace("#", "") else: rank_value = str(idx + 1) # 获取分类信息 category = _get_category_for_platform(platform) sub_category = _get_subcategory_for_platform(platform) # 构建标准化的新闻条目 item_with_source = { "id": item.get("id"), "title": item.get("title", ""), "source": platform, "rank": rank_value, "category": category, "sub_category": sub_category, "url": item.get("url", "") } all_news.append(item_with_source) except Exception as e: log.error(f"Error processing news from {platform}: {e}") # 搜索关键词 search_results = [] for item in all_news: if keyword.lower() in item["title"].lower(): search_results.append(item) # 按站点分组,每个站点内按排名排序 grouped_results = {} for item in search_results: source = item["source"] if source not in grouped_results: grouped_results[source] = [] grouped_results[source].append(item) # 对每个站点内的结果按排名排序 for source, items in grouped_results.items(): # 按排名排序(直接比较数字) items.sort(key=lambda x: int(x["rank"]) if x["rank"].isdigit() else 999) # 重新组合排序后的结果 sorted_results = [] for source, items in grouped_results.items(): sorted_results.extend(items) # 限制返回结果数量 limited_results = sorted_results[:limit] return { "status": "200", "data": limited_results, "msg": "success", "total": len(search_results), "search_results": len(limited_results) } def _get_category_for_platform(platform: str) -> str: """根据平台返回对应的分类""" categories = { "36kr": "科技创业", "hupu": "体育", "sspai": "科技", "weibo": "社交", "zhihu": "知识", "baidu": "综合", "tieba": "社区", "douban": "文化", "bilibili": "视频", "v2ex": "科技", "github": "开发者", "hackernews": "科技", "stackoverflow": "开发者", "jinritoutiao": "资讯", "douyin": "娱乐", "shaoshupai": "科技" } return categories.get(platform, "其他") def _get_subcategory_for_platform(platform: str) -> str: """根据平台返回对应的子分类""" subcategories = { "36kr": "商业资讯", "hupu": "娱乐", "sspai": "数码", "weibo": "热门", "zhihu": "问答", "baidu": "热搜", "tieba": "讨论", "douban": "影视", "bilibili": "热门", "v2ex": "技术", "github": "开源", "hackernews": "国际", "stackoverflow": "问答", "jinritoutiao": "热点", "douyin": "娱乐", "shaoshupai": "数码" } return subcategories.get(platform, "其他")