This commit is contained in:
HuangHai
2026-01-20 21:50:32 +08:00
parent 55e88777d9
commit 21fb8b39d8
6 changed files with 99 additions and 1 deletions

View File

@@ -8,6 +8,7 @@ from datetime import datetime
from typing import List, Optional
from fastapi import APIRouter, HTTPException, BackgroundTasks
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
import pymysql
@@ -38,6 +39,9 @@ def get_db_connection():
class ParseRequest(BaseModel):
text: str
class SummaryRequest(BaseModel):
ids: List[str] = []
def update_status(id, status, error_msg=None):
try:
conn = get_db_connection()
@@ -258,6 +262,64 @@ def delete_record(id: str):
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.post("/api/douyin/summary")
async def generate_summary(request: SummaryRequest):
try:
# Fetch transcripts
conn = get_db_connection()
cursor = conn.cursor()
if request.ids:
# Secure way to handle list in SQL
format_strings = ','.join(['%s'] * len(request.ids))
sql = f"SELECT video_name, transcript FROM t_douyin_record WHERE id IN ({format_strings}) AND status='COMPLETED'"
cursor.execute(sql, tuple(request.ids))
else:
# Default to latest 20
cursor.execute("SELECT video_name, transcript FROM t_douyin_record WHERE status='COMPLETED' ORDER BY create_time DESC LIMIT 20")
records = cursor.fetchall()
conn.close()
if not records:
# If no records, just return a simple message stream
async def empty_stream():
yield "未找到可总结的已完成记录,请先解析视频。"
return StreamingResponse(empty_stream(), media_type="text/event-stream")
# Prepare text
full_text = ""
for r in records:
if r['transcript']:
full_text += f"【标题:{r['video_name']}\n内容:{r['transcript']}\n\n"
if not full_text:
async def empty_text_stream():
yield "记录中没有有效的文案内容。"
return StreamingResponse(empty_text_stream(), media_type="text/event-stream")
# Prompt
prompt = f"""
请对以下充电行业相关的视频内容进行知识精华提取。
要求:
1. 忽略无关闲聊和口语化表达;
2. 按条目列出核心知识点,不要长篇大论;
3. 保持简洁专业,只保留干货;
4. 返回格式为Markdown列表。
内容如下:
{full_text[:15000]}
"""
# Limit context to avoid errors, 15000 chars is roughly safe for most models,
# but if using a small model, might need less. Assuming robust model.
return StreamingResponse(get_llm_response(prompt), media_type="text/event-stream")
except Exception as e:
logger.error(f"Summary generation failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
async def recover_pending_tasks():
"""
Check for tasks stuck in PENDING or PROCESSING state (due to server restart)