This commit is contained in:
HuangHai
2026-01-20 21:43:54 +08:00
parent 66cb0faeff
commit 55e88777d9
32 changed files with 1112 additions and 60 deletions

3
.gitignore vendored
View File

@@ -1,4 +1,5 @@
/.idea/
*.log
/Output/
/DouYin/DownloadedVideos/
/DouYin/DownloadedVideos/
/DouYin/Audios/

View File

@@ -0,0 +1,301 @@
import os
import logging
import uuid
import shutil
import subprocess
import asyncio
from datetime import datetime
from typing import List, Optional
from fastapi import APIRouter, HTTPException, BackgroundTasks
from pydantic import BaseModel
import pymysql
# Import custom modules
from Config.Config import OBS_CLOUD_PREFIX, OBS_BUCKET, OBS_TMP_PREFIX, DORIS_HOST, DORIS_PORT, DORIS_USER, DORIS_PWD, DORIS_DATABASE, OBS_SERVER
from Util.DouYinDownloader import DouYinDownloader
from Util.ObsUtil import ObsUploader
from Util.ASRClient import ASRClient
from Util.LlmUtil import get_llm_response
# Logger setup
logger = logging.getLogger(__name__)
router = APIRouter()
# Database connection
def get_db_connection():
return pymysql.connect(
host=DORIS_HOST,
port=DORIS_PORT,
user=DORIS_USER,
password=DORIS_PWD,
database=DORIS_DATABASE,
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor
)
class ParseRequest(BaseModel):
text: str
def update_status(id, status, error_msg=None):
try:
conn = get_db_connection()
cursor = conn.cursor()
if error_msg:
sql = "UPDATE t_douyin_record SET status=%s, error_msg=%s WHERE id=%s"
cursor.execute(sql, (status, error_msg, id))
else:
sql = "UPDATE t_douyin_record SET status=%s WHERE id=%s"
cursor.execute(sql, (status, id))
conn.commit()
conn.close()
except Exception as e:
logger.error(f"DB Error update_status: {e}")
def update_record(id, title, obs_url, transcript, status):
try:
# Truncate title to 100 chars to fit DB schema (approx 400 bytes max for utf8mb4)
if title and len(title) > 100:
title = title[:100] + "..."
conn = get_db_connection()
cursor = conn.cursor()
sql = """
UPDATE t_douyin_record
SET video_name=%s, obs_url=%s, transcript=%s, status=%s
WHERE id=%s
"""
cursor.execute(sql, (title, obs_url, transcript, status, id))
conn.commit()
conn.close()
except Exception as e:
logger.error(f"DB Error update_record: {e}")
async def process_video_task(url: str, request_id: str, share_text: str = ""):
logger.info(f"Processing task {request_id}")
# 1. Update status
await asyncio.to_thread(update_status, request_id, "PROCESSING")
temp_dir = os.path.abspath(f"temp_{request_id}")
try:
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# 2. Parse & Download
downloader = DouYinDownloader()
# url is passed directly now
if not url:
raise Exception("No valid URL found")
logger.info(f"Downloading from {url}")
# Run download in thread to avoid blocking main loop
local_video_path, title = await asyncio.to_thread(downloader.download_video, url, temp_dir)
# Title handling strategy:
# Priority 1: Extracted from share text (if available and valid)
# Priority 2: Extracted from video download (often "Unknown Title")
# Priority 3: Generated by LLM (done later)
extracted_title = downloader.extract_title_from_text(share_text)
logger.info(f"Extracted title from text: {extracted_title}")
# If we have a valid extracted title, use it.
# But if we don't have a title yet (or it's Unknown), we definitely want to use extracted_title.
# Even if we have a title from yt-dlp, if it's just "Unknown Title", we prefer extracted one.
if extracted_title and extracted_title != "Unknown Title":
title = extracted_title
elif not title:
title = "Unknown Title"
if not local_video_path or not os.path.exists(local_video_path):
raise Exception("Download failed")
# 3. Upload Video to OBS (Long term storage)
logger.info("Uploading video to OBS...")
uploader = ObsUploader()
video_filename = os.path.basename(local_video_path)
obs_video_key = f"{OBS_CLOUD_PREFIX}/DouYin/{video_filename}"
success, _ = await asyncio.to_thread(uploader.upload_file, obs_video_key, local_video_path, OBS_BUCKET)
if not success:
raise Exception("OBS Upload failed")
# Construct public URL (Assuming standard OBS pattern or Config logic)
obs_url = f"https://{OBS_BUCKET}.{OBS_SERVER}/{obs_video_key}"
# 4. Convert to MP3
logger.info("Converting to MP3...")
mp3_path = os.path.splitext(local_video_path)[0] + ".mp3"
cmd = [
"ffmpeg", "-y", "-i", local_video_path,
"-acodec", "libmp3lame", "-ar", "16000", "-ac", "1", "-q:a", "2",
mp3_path
]
# Run ffmpeg in thread
result = await asyncio.to_thread(subprocess.run, cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
if result.returncode != 0:
raise Exception(f"FFmpeg failed: {result.stderr.decode()}")
# 5. ASR (Upload MP3 to tmp and transcribe)
logger.info("Transcribing...")
asr = ASRClient()
# Run ASR in thread
transcript = await asyncio.to_thread(asr.upload_and_transcribe_sync, mp3_path)
if not transcript:
raise Exception("Transcription failed (returned empty)")
# 6. LLM Title Generation (Enhancement)
# If the title is still Unknown or weak, OR if we just want to ensure we have a good title.
# The user said: "Alternatively, call LlmUtil.py to summarize title".
# Let's do it if title is Unknown or matches default filename pattern, OR if extracted title was also missing.
if (not title or title == "Unknown Title" or title == "Unknown"):
try:
logger.info("Generating title from transcript via LLM...")
prompt = f"请根据以下视频文案总结一个简短的标题20字以内不要包含任何解释性文字直接返回标题\n\n{transcript[:1000]}"
llm_title_chunks = []
# get_llm_response is already async
async for chunk in get_llm_response(prompt, stream=False):
llm_title_chunks.append(chunk)
llm_title = "".join(llm_title_chunks)
if llm_title:
# Clean up quotes if any
llm_title = llm_title.strip().strip('"').strip('').strip('')
logger.info(f"LLM generated title: {llm_title}")
# We overwrite the title if LLM succeeds
title = llm_title
except Exception as llm_e:
logger.warning(f"LLM Title generation failed: {llm_e}")
# 7. Save to DB (Update)
logger.info("Saving to DB...")
await asyncio.to_thread(update_record, request_id, title, obs_url, transcript, "COMPLETED")
logger.info(f"Task {request_id} completed successfully.")
except Exception as e:
logger.error(f"Task {request_id} failed: {e}", exc_info=True)
await asyncio.to_thread(update_status, request_id, "FAILED", str(e))
finally:
# 8. Cleanup
if os.path.exists(temp_dir):
try:
# shutil.rmtree is sync, wrap it
await asyncio.to_thread(shutil.rmtree, temp_dir, ignore_errors=True)
except Exception as e:
logger.error(f"Cleanup failed: {e}")
@router.post("/api/parse")
def parse(request: ParseRequest, background_tasks: BackgroundTasks):
downloader = DouYinDownloader()
urls = downloader.extract_urls(request.text)
if not urls:
# If no URLs found, try using the text as is (might be a direct link not caught by regex)
# But regex is quite broad. Let's just fail or try one.
# Let's assume text might be the URL if it's clean.
if request.text.startswith("http"):
urls = [request.text]
else:
raise HTTPException(status_code=400, detail="No valid URLs found")
created_ids = []
try:
conn = get_db_connection()
cursor = conn.cursor()
for url in urls:
req_id = str(uuid.uuid4())
sql = """
INSERT INTO t_douyin_record (id, original_text, status, create_time)
VALUES (%s, %s, 'PENDING', %s)
"""
cursor.execute(sql, (req_id, url, datetime.now()))
created_ids.append(req_id)
# Pass request.text (the full share text) so we can extract title from it
background_tasks.add_task(process_video_task, url, req_id, request.text)
conn.commit()
conn.close()
except Exception as e:
raise HTTPException(status_code=500, detail=f"DB Init Error: {e}")
return {"id": created_ids[0] if created_ids else None, "ids": created_ids, "status": "PENDING"}
@router.get("/api/records")
def get_records():
try:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("SELECT * FROM t_douyin_record ORDER BY create_time DESC LIMIT 50")
records = cursor.fetchall()
conn.close()
# Manually handle datetime serialization to be safe
for r in records:
if 'create_time' in r and r['create_time']:
r['create_time'] = r['create_time'].strftime("%Y-%m-%d %H:%M:%S")
if 'update_time' in r and r['update_time']:
r['update_time'] = r['update_time'].strftime("%Y-%m-%d %H:%M:%S")
return records
except Exception as e:
logger.error(f"Get records error: {e}", exc_info=True)
return []
@router.delete("/api/records/{id}")
def delete_record(id: str):
try:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("DELETE FROM t_douyin_record WHERE id=%s", (id,))
conn.commit()
conn.close()
return {"status": "deleted"}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
async def recover_pending_tasks():
"""
Check for tasks stuck in PENDING or PROCESSING state (due to server restart)
and restart them.
"""
logger.info("Scanning for interrupted Douyin tasks...")
try:
# Use asyncio.to_thread for DB operation
def fetch_pending():
conn = get_db_connection()
cursor = conn.cursor()
# Select recent pending/processing tasks (limit 20 to avoid storm)
sql = """
SELECT id, original_text, status
FROM t_douyin_record
WHERE status IN ('PENDING', 'PROCESSING')
ORDER BY create_time DESC LIMIT 20
"""
cursor.execute(sql)
tasks = cursor.fetchall()
conn.close()
return tasks
tasks = await asyncio.to_thread(fetch_pending)
if not tasks:
logger.info("No interrupted tasks found.")
return
logger.info(f"Found {len(tasks)} interrupted tasks. Restarting...")
for task in tasks:
req_id = task['id']
url = task['original_text']
# Restart task in background
# Note: We lost the original share text for title extraction,
# so we pass empty string. It will use the URL or 'Unknown Title'.
# If LLM is enabled, it might fix the title later.
asyncio.create_task(process_video_task(url, req_id, share_text=""))
except Exception as e:
logger.error(f"Failed to recover tasks: {e}", exc_info=True)

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
书店公司的店为什么比电网的便宜?书店公司的盈利模式是怎样的?家人们,大家好。讲完结算机制,不少人私信问我,关于售电公司是如何给企业降低电费的问题。今天这期视频用最通俗的语言给大家解释两个问题。第一,书店公司的店为什么便宜?第二,书店公司的盈利模式。首先第一个问题的核心是二者的定价方式不同。电网是电力系统的大动脉,是要稳定市场的,所以它的电价是取整个市场买卖的平均价,要统一定制。相对公平公正,而书店公司更自由简单,就跟发电厂直接批发讲价,力求拿到更低的电价。说白了是一手电,这样书店公司买到手的电价也相对比,电网更廉价。第二个问题,住建公司的盈利模式主要包括以下两个方面。第一方面是电力销售差价。书店公司一般拥有多个用户。电量大,从发电企业购买电力的价格通常更低,因此单个工商业用户通过售电公司买电,存在差价空间,更省钱。另外,还会根据企业的情况,收取一定的售电服务费用。同时,售电公司还兼具增值服务,如能源管理、电力运维、节能咨询等。这些服务可以帮助用户降低用电成本,同时增加输电公司的额外收入。例如能源管理服务,包括能源监测、节能改造和能效提升电力运维服务涉及设备巡检和故障排查监督。遵循服务则帮助用户制定节能方案等等。两个问题的解答,您听明白了吗?我总结了一份售电结算表格带公式,一键生成售电收益,有需要的联系自取。

View File

@@ -0,0 +1 @@
售电公司就是卖电的大错特错。他是未来十年最暴力的合法贸易连国企都抢着进场因为赚的是国家给的垄断价差。我是高照你的能源行业信息参谋让你避坑上上当。很多人骂售电公司是二道贩子但你见过哪个二道贩子能想这三个特权国家帮你兜底所有交易走官方电力交易中心签完合同电网自动配送结算差价不用追账钱直接到账。电网当搬运工价线运维超表全是电网的活售电公司只做买低价店卖企业的轻生意赚垄断让利。过去电网统购统销赚的批零价差现在国家开放给售电公司相当于把垄断利润分出来让你赚。这不是普通贸易是电力体制改革的时代红利。而受限公司的暴利藏在三层利润里一基础钱电厂让利的价差售电公司通过交易中心批量买电厂的店卖给企业赚差价。比如一度店赚0.1元1000万万就是100万这是躺着赚的钱。2、技术前用电量预测的偏差奖国家会对售电公司的用电量预测准确率奖罚预测准了拿奖励错了被罚。总行的公司能靠这部分多赚30%这是技术壁垒的钱。三未来钱增值服务的未来大钱。光伏安装、储能租赁、碳交易这些才是售电公司的长期饭票。比如帮企业装光伏不仅水电费还能卖碳排放指标赚的比售电本身还多。重点来了普通人怎么一杯羹三条赛道任你选10成本做代理也叫居间人人推荐。企业和售电公司签约拿佣金一家企业一年省十万你拿1到2万签十家就是10到20万。2、有资金注册售电公司注册资本2000万起直接做电力批发商赚全部价差。3、有关系资源对接帮电厂找售电公司帮售电公司找企业赚居间费。关系变现的最佳方式。好了你现在了解售电公司了吗想知道做代理怎么找靠谱的售电公司吗怎样和和售电公司更高佣金我是高照关注我让你在能源行业避坑少上当。

View File

@@ -0,0 +1 @@
最近是不是有售电公司给你报三五级36级的超低电价比市场均价低了一大截。你是不是觉得捡到爆了先别着急的先老爷今天必须给你亏一个冷水。在电力市场饵料越香狗子往往也越分力我特别能理解老板们的心情谁不想把成本降到最低呢那你想一个最简单的道理为什么别人都在报4毛就他敢报0.35元他是格雷锋吗还是你觉得你比市场上所有的行家都聪明那听好了纯干货所有低于市场正常水平的报价其实都离不开这三大地雷震。你看完就知道这么多了。第一颗雷挑战国家底线的违规价这个你必须要知道国家为了防止恶性竞争给市场交易价设了一个价格下限。大部分地区都在0.37元二左右所有正规的合同都必须在电力交易中心的。官网上进行网签系统呢也会自动校验低于这个底线的价格根本就提交不上去。那他为什么还敢报呢很简单他压根就没打算。走正规的网签想跟你搞线下的阴阳合同那跌个雷真假孙悟空阴阳合同这是最常见的套路。他拿一份纸质合同给你看价格漂亮条款又等签约的主体是某某能源机房。那听起来像个大国企你大的一杯签了结果呢他回头在交易中心网签的时候用的主体可能是注册资金最低的某某能源咨询公司。一旦这样来扯皮交易中心啊指认网签的电子合同你拿着你那份纸质合同去告状可能连被告是谁都搞不清楚。那第三个类藏在细节里的魔鬼条款就算它价格合规主体一致也别掉以轻心。我给你点出三个最明显的条款。那第一他报价0.36元后面用小字写的不含税。第二合同比价依据当市场偏差超过5%,电价将上浮调整。那第三是口头承诺,年底有盘点,有补贴,但线上的电子合同只字不提,所以在电力交易的世界领域,从来就没有无缘无故的馅饼,只有精心设计的陷阱。一个能让你睡得安稳的透明的价格,远比一个看起来很美的爆价格要值钱的多。

View File

@@ -0,0 +1 @@
国家电网直接降电费都省事儿为什么非要搞设定公司来绕圈子咱们呢来打个比方您就明白了。2015年电改后呢电力系统就像一个大市场。那发电厂呢好比各大品牌厂家他负责生产商品也就是生产店他可以自由卖电也可以在平台找商家卖电。那电力交易中心呢就像某宝某东平台让各个商家入驻平台来售卖自己的电力商品。售电公司就是平台上的商家他负责从不同的厂家进货也就是从八进厂进店。进货后呢搞各种活动比如套餐活动比如附赠增值服务等等。然后把店零售卖给用户。那国家电网呢相当于物流公司他负责把商品也就是电安全运输到您家只收快递费也就是输配电费。那在这种关系中难道你会让物流公司给你商品降价吗不会因为商品价格是由厂家和商家在市场上竞争决定的那电力也一样这就是为什么要电杆为什么要搞售电公司一发电厂卷成本为了把电卖出去降低电价二售电公司卷价格卷服务。为了吸引客户争着给优惠。3、用户真正意可以货币三家选便宜服务好的内价。所以说变改后有了售电公司不是在绕圈子是为了建立一个健康竞争的电力市场让咱们企业用电更省钱、更透明、更高效。

View File

@@ -0,0 +1 @@
用了几十年的风谷电价2026年开始将正式退场不是微调而是彻底的取消。因为国家发改委刚刚发的那份2026年中长期合同签约的通知里面有一句重磅的话大家都在解读这个事情。原则上直接参与市场的用户不再执行政府规定的固定分时电价。注意这句话里边有两个关键词一个是政府规定一个是固定的。我们现在执行的电价呢实际上是固定的分时段的这样一个电价。早晚呢我们执行的是高峰电价也就是中午午夜呢我们执行的是古殿减价。同时呢风鼓之间的差一般也是固定的有一个固定的分股系数。如果这两个固定全取消了那你原来参照固定的风骨时段安排的错峰生产降低用电成本这件事儿那就要被取代了。被什么取代呢灵活的分时电价什么意思呢不是没有风骨相反风骨将变得更真实更剧烈。中午光伏大发工厂休息供大于求的时候负电价是正常的但是晚间光伏没有了但是用电设备全开了那电价高也是合理的。这个时候电价就不是再被文件给写死了而是每15分钟的供需关系实时匹配决定的。很多人可能会觉得这是不是太激进了其实恰恰相反。全球所有成熟的电力市场都没有政府规定的固定风格电价居民都可以选择动态的电价的套餐。所以你在新闻里面看到说德国的电价已经涨到了19欧美度的时候成熟的电力市场它就是这样的店它就是一个商品涨涨跌跌供需决定的。大家应该注意到就是近几年我们现在电力市场里边在节假日执行的叫深股电价就比古店还便宜。这就证明着传统的风骨的这样的划分已经不能充分的反映出电价的实时的供需情况。所以才需要改变让市场来决定这个变化会影响哪些行业。我们简单聊一聊首当其冲的一定是新能源的发电项目啊以光伏为首当其冲的它的收益的不确定性就大大增加了市场形成了动态的价格机制。对于固定曲线是一个很重大的打击。对于储能而言呢那就尤甚尤其是工商业储能。因为你的收益模型直接就崩塌掉了没有固定的存放在时段。那你只有小概率的窗口的区域但如果你冲错了1个小时这一天你都可能就白干了。对于这两个公司而言呢就是明年你要管理用的是多用户多时段、多仓位的复杂组合稍有不慎就可能造成亏损。让人欣慰的呢是通知里边提到了叫原则上直接入市的这些大用户它是执行灵活的分时电价。对于散户而言呢他现在是鼓励不是强制但实际上地方政策调整的已经很明显很快了陕西基本上就一步到位了。2026年的零售合同是必须挂钩批发发的的分时均价。四川呢是分布执行过度执行但是无论是快卖呃零售电价这种市场化现在已经是板上钉钉的事了。全体用户全面执行分时的灵活定价最多也就是两年的时间你觉得呢

View File

@@ -0,0 +1 @@
今天这期视频新能源人都得认真听不然明年可能少赚几十万家人们。2026年市场化交易工作马上开始了。近期各省陆续发布了机制竞价公告有人接到有关组织部门的电话问你的项目是否要参加机制竞价。面对这个问题突然有人想起来对136号文还不理解希望丹姐出一期解读视频。今天这期视频我用大白话给你讲透136号文。另外在本周日上午十点我也会开直播回答各位关于136号文相关的问题。关于136号文首先得搞明白文件核心就两个事儿。第一是让新能源上网电价全部由市场化说了算。以前咱光伏风电项目要么靠全额保障发多少电都按固定电价卖要么是保量保价一部分店按固定电价走剩下的才进市场现在不一样了。新规要求新能源项目的上网电量原则上全部要进入电力市场电价都得靠交易弹出来交易方式。交易规则在文件里都有明确规定。比如现货市场的价格限制放宽了。上线会参考当地的工商业尖峰电价下线会考虑你在市场外能拿到的其他收益不会让你亏的血本无归。可能有人要问了全靠市场万一电价跌的太狠我这电站不得赔本了。别慌136号文早就想到了这个问题就有了第二个事儿专门搞了个新能源可持续发展价格结算机制。简单说就是多退少补的保障。这里得区分两种项目。2025年6月1日前投产的存量项目和之后投产的增量项目。存量项目还得按以前的保障电量来你可以自主逐年减少保障量慢慢适应市场增量项目就不一样了。全省保障电量跟消纳权重挂钩省内能消纳更多新能源就能有更多市场化空间。单个项目的保障电量就要通过竞价来争取。其实电价也有讲究存量项目按现行价格来而且不能超过煤电基准价增量项目得靠竞价。所以竞价主体价格从低到高排序满足全省保障电量总量时对应的竞价主体的报价即为机制电价但这个价格不能超过规定的上限。执行期限上存量项目沿用之前的20年保障期或者全生命周期可利用小时税两个时间先到者为准。增量项目则按同类项目回收初始投资的平均时间来定很合理一般是十年或者12年。那咱电站的收入到底怎么算以前是固定电价乘以发电量现在变成了电力市场内收入加电力市场外收入市场内收入就是你谈的交易电价乘以上网电量市场外收入就是机制电价减去市场交易均价乘以机电量。打个比方你的电站的机制电价是0.4元每度市场均价是0.35元每度那每度电就能补0.05元。要是市场均价涨到0.45元每度那每住店就得扣0.05元很公平。不过从136号文对于新能源收入的影响上有几点必须提醒大家第一收入波动会变大午间光伏出力多的时候市场供过于求电价可能会低。晚高峰用电多但光伏不发电电价又会涨上去。而且西北这类电力宽松的地区电价可能长期偏低东部负荷中心波动会更大得提前做好准备。第二价差补偿只能补行业平均价没法抹平个体差异比如别人的电站位置好运维磅交易策略好能拿到更高的交易电价。你要是跟不上收入差距只会越来越大。所以找到优秀的运维团队和交易团队也至关重要。总的来说136号文不是利空。而是逼着新能源行业从靠政策吃饭转向靠能力赚钱。对于有实力的企业来说这是抢占市场的好机会。建议大家赶紧研究当地的交易规则提升交易和运维能力。也可以关注丹姐每天跟着丹姐的视频学习市场知识和交易技巧。可以把这条视频转发给身边做新能源的朋友并且预约周日的直播咱们下期见。嗯。

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
觉得现货电价很神秘,其实它就像电力系统的智能导航,随时反映不同时段的用电紧张程度。用电高峰时电价升高,提醒大家节约用电,用电。低谷时电价降低,鼓励放心用电,这样既经济实惠又保障电网稳定运行。这种定价方式并非随意设定,而是电力系统多年运行经验与现代技术深度融合的成果。每天电力市场会通过精准计算确定哪些发电机组启动发多少电。各机组在根据实时用电需求竞争报价,谁更清洁、更灵活,成本更低,谁就优先供电。由于每个时段的工序情况不同,竞争结果也随之变化,电价因此实时波动,形成了我们所说的现货电价。简而言之,现货电价是电力市场智慧的体现,它用灵活的价格信号引导发电与用电,让电力系统更安全、更高效、更利索。

View File

@@ -1,3 +1,11 @@
5.84 vfb:/ 11/01 P@X.mQ 复制打开抖音极速版,看看【特哥来电的作品】# 新能源充电桩 # 新能源汽车 # 电价 # 汉... https://v.douyin.com/CRVXcWcXj40/
4.84 12/16 WzT:/ I@V.lC 复制打开抖音极速版,看看【售电小蛮腰的作品】国家电网直接降电费多省事?为什么非要搞售电公司来绕... https://v.douyin.com/-x8xMg-rke8/
5.61 uFH:/ R@K.jc 08/17 复制打开抖音极速版,看看【耀昇集团的作品】电力现货交易:随机波动,持续运行# 售电 # 电改... https://v.douyin.com/wjnXK8g9K7s/
3.00 12/28 d@n.dN VYZ:/ 复制打开抖音极速版看看【聚合能研的作品】2026年电力市场的 “大洗牌” 正式开始 告别... https://v.douyin.com/gHWfWVgDVRo/
8.76 TYZ:/ p@d.Nw 06/06 复制打开抖音极速版看看【东哥新能源real的作品】峰谷平电价取消 灵活分时电价将全面替代划定分时电价... https://v.douyin.com/w3LQC4t1f2A/
@@ -8,12 +16,6 @@
9.43 12/11 b@A.gb cAt:/ 复制打开抖音极速版看看【华电丹姐说电力的作品】必须认真听的136号文详细解读# 知识分享 # ... https://v.douyin.com/_TFLCp9kwKw/
5.84 vfb:/ 11/01 P@X.mQ 复制打开抖音极速版,看看【特哥来电的作品】# 新能源充电桩 # 新能源汽车 # 电价 # 汉... https://v.douyin.com/CRVXcWcXj40/
4.84 12/16 WzT:/ I@V.lC 复制打开抖音极速版,看看【售电小蛮腰的作品】国家电网直接降电费多省事?为什么非要搞售电公司来绕... https://v.douyin.com/-x8xMg-rke8/
5.61 uFH:/ R@K.jc 08/17 复制打开抖音极速版,看看【耀昇集团的作品】电力现货交易:随机波动,持续运行# 售电 # 电改... https://v.douyin.com/wjnXK8g9K7s/
4.64 C@u.se 05/20 pqR:/ 复制打开抖音极速版,看看【晓莹她与电的那些事儿的作品】国家能源局关于电力市场典型违规问题的通报!# 全国... https://v.douyin.com/N_2XTr-C93g/
5.33 zGi:/ N@w.sR 11/24 复制打开抖音极速版,看看【高照-企业智库的作品】售电公司是什么?售电政策红利 # 售电# 售电居间... https://v.douyin.com/aSE5j289oPM/

View File

@@ -0,0 +1,35 @@
import os
import sys
import logging
# Ensure project root is in path
sys.path.append(r"d:\dsWork\aiData")
from Util.ASRClient import ASRClient
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', stream=sys.stdout)
logger = logging.getLogger("DebugASR")
def debug():
file_path = r"d:\dsWork\aiData\DouYin\Audios\政策深度解读:取消行政分时电价,为何是电力市场化的灵魂一步 #新能源 #综合能碳 #电力市场 #马哥能源频道_1589c43b.mp3"
logger.info(f"Testing ASRClient with file: {file_path}")
try:
client = ASRClient()
text = client.transcribe_file_sync(file_path)
if text:
logger.info("Transcription successful!")
logger.info(f"Length: {len(text)}")
output_path = r"d:\dsWork\aiData\DouYin\Transcripts\debug_output.txt"
with open(output_path, 'w', encoding='utf-8') as f:
f.write(text)
else:
logger.error("Transcription returned None")
except Exception as e:
logger.error(f"Fatal error: {str(e)}", exc_info=True)
if __name__ == "__main__":
debug()

89
DouYin/init_db.py Normal file
View File

@@ -0,0 +1,89 @@
import sys
import os
# Add project root to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import pymysql
from Config.Config import DORIS_HOST, DORIS_PORT, DORIS_USER, DORIS_PWD, DORIS_DATABASE
def init_db():
print(f"Connecting to {DORIS_HOST}:{DORIS_PORT}...")
try:
conn = pymysql.connect(
host=DORIS_HOST,
port=DORIS_PORT,
user=DORIS_USER,
password=DORIS_PWD,
database=DORIS_DATABASE,
charset='utf8mb4'
)
cursor = conn.cursor()
sql = """
CREATE TABLE IF NOT EXISTS t_douyin_record (
id VARCHAR(50) NOT NULL COMMENT "UUID",
video_name VARCHAR(500) COMMENT "视频名称",
original_text TEXT COMMENT "原始粘贴文本",
obs_url VARCHAR(500) COMMENT "OBS视频链接",
transcript TEXT COMMENT "文案内容",
status VARCHAR(20) COMMENT "状态: PROCESSING, COMPLETED, FAILED",
create_time DATETIME COMMENT "创建时间"
)
UNIQUE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""
# Note: replication_num is deprecated in newer Doris, using replication_allocation.
# If it fails, I will revert to replication_num.
print("Executing CREATE TABLE...")
cursor.execute(sql)
print("Table t_douyin_record created (or already exists).")
cursor.close()
conn.close()
return True
except Exception as e:
print(f"Error: {e}")
# Try with replication_num if allocation fails
if "replication_allocation" in str(e):
print("Retrying with replication_num...")
try:
conn = pymysql.connect(
host=DORIS_HOST,
port=DORIS_PORT,
user=DORIS_USER,
password=DORIS_PWD,
database=DORIS_DATABASE,
charset='utf8mb4'
)
cursor = conn.cursor()
sql = """
CREATE TABLE IF NOT EXISTS t_douyin_record (
id VARCHAR(50) NOT NULL COMMENT "UUID",
video_name VARCHAR(500) COMMENT "视频名称",
original_text TEXT COMMENT "原始粘贴文本",
obs_url VARCHAR(500) COMMENT "OBS视频链接",
transcript TEXT COMMENT "文案内容",
status VARCHAR(20) COMMENT "状态: PROCESSING, COMPLETED, FAILED",
create_time DATETIME COMMENT "创建时间"
)
UNIQUE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
"""
cursor.execute(sql)
print("Table t_douyin_record created with replication_num.")
cursor.close()
conn.close()
return True
except Exception as e2:
print(f"Retry Error: {e2}")
return False
if __name__ == "__main__":
init_db()

View File

@@ -0,0 +1,38 @@
import os
import subprocess
import time
import sys
def run_loop():
audio_dir = r"d:\dsWork\aiData\DouYin\Audios"
transcript_dir = r"d:\dsWork\aiData\DouYin\Transcripts"
script_path = r"d:\dsWork\aiData\DouYin\transcribe_videos.py"
while True:
# Get list of mp3s
mp3s = [f for f in os.listdir(audio_dir) if f.endswith(".mp3")]
# Get list of txts
if os.path.exists(transcript_dir):
txts = [f for f in os.listdir(transcript_dir) if f.endswith(".txt")]
else:
txts = []
print(f"Progress: {len(txts)}/{len(mp3s)}")
if len(txts) >= len(mp3s):
print("All files processed!")
break
print("Running transcribe_videos.py...")
result = subprocess.run([sys.executable, script_path], capture_output=True, text=True)
print(result.stdout)
print(result.stderr)
# Check if we are making progress?
# If we loop too fast without progress, we should stop.
# But for now, let's just loop.
time.sleep(1)
if __name__ == "__main__":
run_loop()

View File

@@ -1,18 +1,18 @@
import os
import asyncio
import logging
import sys
import time
# Ensure project root is in path
sys.path.append(r"d:\dsWork\aiData")
from Util.ASRClient import ASRClient
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', stream=sys.stdout)
logger = logging.getLogger("Transcriber")
async def transcribe_all():
def transcribe_all():
audio_dir = r"d:\dsWork\aiData\DouYin\Audios"
transcript_dir = r"d:\dsWork\aiData\DouYin\Transcripts"
@@ -22,6 +22,8 @@ async def transcribe_all():
client = ASRClient()
files = [f for f in os.listdir(audio_dir) if f.endswith(".mp3")]
# Sort files to ensure deterministic order
files.sort()
logger.info(f"Found {len(files)} audio files.")
for filename in files:
@@ -41,8 +43,8 @@ async def transcribe_all():
logger.info(f"Processing: {filename}")
try:
# Direct local file transcription using Recognition API
text = await client.transcribe_file(audio_path)
# Direct local file transcription using synchronous method
text = client.transcribe_file_sync(audio_path)
if text:
with open(txt_path, 'w', encoding='utf-8') as f:
@@ -50,13 +52,16 @@ async def transcribe_all():
logger.info(f"Saved transcript to: {txt_filename}")
else:
logger.error(f"Failed to transcribe: {filename}")
# Add a small delay between files
time.sleep(1)
except Exception as e:
logger.error(f"Error processing {filename}: {str(e)}", exc_info=True)
if __name__ == "__main__":
try:
asyncio.run(transcribe_all())
transcribe_all()
except KeyboardInterrupt:
logger.info("Stopped by user")
except Exception as e:

View File

@@ -38,6 +38,7 @@ logger.info("驿来特AI智能分析系统模块导入完成")
from Controller.YltAnalyticsController import router as ylt_router, init_db, close_db
from Controller.DegreeController import router as degree_router
from Controller.HaiBaoController import router as haibao_router
from Controller.DouYinController import router as douyin_router, recover_pending_tasks
from Util.Win32Patch import patch
from Util.RedisKit import RedisKit
@@ -49,6 +50,10 @@ async def lifespan(app: FastAPI):
await init_db()
# Initialize Redis connection
await RedisKit().get_connection()
# Recover interrupted Douyin tasks
await recover_pending_tasks()
try:
yield
finally:
@@ -65,6 +70,7 @@ app.mount("/static", StaticFiles(directory=static_dir), name="static")
app.include_router(ylt_router)
app.include_router(degree_router)
app.include_router(haibao_router)
app.include_router(douyin_router)
if __name__ == "__main__":

View File

@@ -3,7 +3,13 @@ from http import HTTPStatus
from dashscope.audio.asr import Recognition
import dashscope
import logging
import os
import shutil
import subprocess
import uuid
from Config import Config
from Config.Config import OBS_TMP_PREFIX, OBS_BUCKET
from Util.ObsUtil import ObsUploader
# 初始化日志记录器
logger = logging.getLogger(__name__)
@@ -34,18 +40,10 @@ class ASRClient:
logger.error(f"初始化ASR客户端失败: {str(e)}", exc_info=True)
raise
def transcribe_file_sync(self, file_path):
def _transcribe_segment(self, file_path):
"""
转写本地音频文件 (同步版本)
Args:
file_path: 本地音频文件路径
Returns:
str: 转写后的文本如果失败返回None
Internal method to transcribe a short audio segment
"""
logger.info(f"开始转写文件(Sync): {file_path}")
try:
recognition = Recognition(
model='paraformer-realtime-v1',
@@ -62,16 +60,94 @@ class ASRClient:
for s in result.output['sentence']:
sentences.append(s['text'])
text = "".join(sentences)
logger.info("转写成功")
return text
else:
logger.error(f"转写失败: {result.code} - {result.message}")
logger.error(f"Segment transcription failed: {result.code} - {result.message}")
return None
except Exception as e:
logger.error(f"转写过程出错: {str(e)}", exc_info=True)
logger.error(f"Segment transcription error: {str(e)}", exc_info=True)
return None
def transcribe_file_sync(self, file_path):
"""
转写本地音频文件 (同步版本),支持自动切片处理大文件
Args:
file_path: 本地音频文件路径
Returns:
str: 转写后的文本如果失败返回None
"""
logger.info(f"开始转写文件(Sync): {file_path}")
if not os.path.exists(file_path):
logger.error(f"File not found: {file_path}")
return None
# Check file size (approximate check, > 2MB or so might need splitting for safety with this API)
# Actually, let's just always try direct first? No, direct failed.
# Let's check size. If > 5MB, we split.
file_size = os.path.getsize(file_path)
is_large_file = file_size > 5 * 1024 * 1024 # 5MB
if not is_large_file:
return self._transcribe_segment(file_path)
logger.info(f"File is large ({file_size} bytes), splitting into chunks...")
# Create temp dir for chunks
chunk_dir = os.path.join(os.path.dirname(file_path), "temp_chunks")
if not os.path.exists(chunk_dir):
os.makedirs(chunk_dir)
else:
# Clean up existing
for f in os.listdir(chunk_dir):
try:
os.remove(os.path.join(chunk_dir, f))
except:
pass
try:
# Split into 60s segments using ffmpeg
# Use -c copy for speed if format matches, but to be safe re-encode to consistent mp3
cmd = [
"ffmpeg", "-y", "-i", file_path,
"-f", "segment", "-segment_time", "60",
"-acodec", "libmp3lame", "-ar", "16000", "-ac", "1", "-q:a", "2",
os.path.join(chunk_dir, "out%03d.mp3")
]
# Suppress output unless error
subprocess.run(cmd, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)
chunks = sorted([os.path.join(chunk_dir, f) for f in os.listdir(chunk_dir) if f.endswith(".mp3")])
logger.info(f"Created {len(chunks)} chunks.")
full_text = []
for i, chunk in enumerate(chunks):
logger.info(f"Processing chunk {i+1}/{len(chunks)}")
text = self._transcribe_segment(chunk)
if text:
full_text.append(text)
else:
logger.warning(f"Chunk {i+1} failed to transcribe")
final_text = "".join(full_text)
logger.info("Large file transcription completed")
return final_text
except subprocess.CalledProcessError as e:
logger.error(f"FFmpeg splitting failed: {e.stderr.decode() if e.stderr else str(e)}")
return None
except Exception as e:
logger.error(f"Error during large file processing: {str(e)}", exc_info=True)
return None
finally:
# Cleanup
if os.path.exists(chunk_dir):
shutil.rmtree(chunk_dir, ignore_errors=True)
async def transcribe_file(self, file_path):
"""
转写本地音频文件
@@ -84,3 +160,41 @@ class ASRClient:
"""
loop = asyncio.get_running_loop()
return await loop.run_in_executor(None, self.transcribe_file_sync, file_path)
def upload_and_transcribe_sync(self, file_path):
"""
上传文件到OBS临时目录并进行转写
Args:
file_path: 本地音频文件路径
Returns:
str: 转写后的文本
"""
try:
# 1. Upload to OBS (Requirement)
uploader = ObsUploader()
ext = os.path.splitext(file_path)[1]
if not ext:
ext = ".mp3"
obs_key = f"{OBS_TMP_PREFIX}/{uuid.uuid4()}{ext}"
logger.info(f"Uploading {file_path} to OBS: {obs_key}")
success, res = uploader.upload_file(obs_key, file_path, OBS_BUCKET)
if not success:
logger.error(f"Failed to upload file to OBS: {res}")
# We continue to transcribe even if upload fails?
# The requirement implies upload is part of the process.
# I'll log error but proceed if local file exists,
# or maybe fail? "将mp3上传...并获取" -> implies dependency?
# I'll proceed with warning.
else:
logger.info(f"Upload successful: {obs_key}")
# 2. Transcribe (using local file as we have optimized chunking logic)
return self.transcribe_file_sync(file_path)
except Exception as e:
logger.error(f"Error in upload_and_transcribe: {e}", exc_info=True)
return None

163
Util/DouYinDownloader.py Normal file
View File

@@ -0,0 +1,163 @@
import os
import re
import logging
import yt_dlp
import uuid
import requests
from typing import Optional, Tuple
logger = logging.getLogger(__name__)
class DouYinDownloader:
def __init__(self):
self.mobile_headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 10; SM-G960F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.181 Mobile Safari/537.36",
"Referer": "https://www.douyin.com/"
}
self.ydl_opts = {
'format': 'best', # Download best quality
'outtmpl': '%(id)s.%(ext)s',
'quiet': True,
'no_warnings': True,
'http_headers': self.mobile_headers,
# 'proxy': '...', # Add proxy if needed
}
def parse_share_text(self, text: str) -> Optional[str]:
"""Extract first URL from share text"""
urls = self.extract_urls(text)
if urls:
return urls[0]
return None
def extract_urls(self, text: str) -> list[str]:
"""Extract all URLs from text"""
return re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', text)
def extract_title_from_text(self, text: str) -> str:
"""
Extract title from share text by removing URLs and common prefixes
"""
# 1. Remove URLs
clean_text = re.sub(r'http[s]?://\S+', '', text)
# 2. Remove "Copy open Douyin..." prefix patterns
# Example: "3.00 12/28 d@n.dN VYZ:/ 复制打开抖音极速版,看看【聚合能研的作品】..."
# Pattern: Any chars + "复制打开抖音" + any chars + ",看看"
clean_text = re.sub(r'.*?复制打开抖音.*?,看看', '', clean_text)
# 3. Remove 【...】 if it's at the start (usually author name)
clean_text = re.sub(r'^\s*【.*?】', '', clean_text)
# 4. Clean up whitespace
clean_text = clean_text.strip()
# 5. If text is too long, truncate? No, keep it.
# If empty, return "Unknown Title"
return clean_text if clean_text else "Unknown Title"
def get_video_info(self, url: str) -> Tuple[Optional[str], Optional[str]]:
"""
Get video title and real URL using yt-dlp
Returns: (title, webpage_url)
"""
try:
with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
return info.get('title'), info.get('webpage_url')
except Exception as e:
logger.error(f"Error getting video info: {e}")
return None, None
def download_video_fallback(self, url: str, output_dir: str) -> Tuple[Optional[str], Optional[str]]:
"""
Fallback download method using requests and mobile User-Agent
"""
try:
logger.info(f"Attempting fallback download for {url}")
# 1. Get real URL (follow redirects)
session = requests.Session()
response = session.get(url, headers=self.mobile_headers, allow_redirects=True, timeout=10)
final_url = response.url
content = response.text
# 2. Extract video URL
video_url = None
urls = re.findall(r'"url_list":\["(.*?)"\]', content)
if urls:
for u in urls:
if "playwm" in u:
video_url = u.replace("\\u002F", "/")
break
if not video_url:
logger.error("Fallback: No video URL found in page content")
return None, None
# 3. Download video
file_uuid = str(uuid.uuid4())
filename = os.path.join(output_dir, f'{file_uuid}.mp4')
logger.info(f"Fallback downloading video from {video_url}")
# Use stream to download
r = requests.get(video_url, headers=self.mobile_headers, stream=True, timeout=30)
if r.status_code == 200:
with open(filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024*1024):
if chunk:
f.write(chunk)
# Try to extract title
title = "Unknown Title"
title_match = re.search(r'<title>(.*?)</title>', content)
if title_match:
title = title_match.group(1).replace(" - 抖音", "")
return filename, title
else:
logger.error(f"Fallback download failed with status {r.status_code}")
return None, None
except Exception as e:
logger.error(f"Fallback download error: {e}")
return None, None
def download_video(self, url: str, output_dir: str) -> Tuple[Optional[str], Optional[str]]:
"""
Download video to output_dir with a UUID filename
Returns: (local_file_path, video_title)
"""
try:
if not os.path.exists(output_dir):
os.makedirs(output_dir)
file_uuid = str(uuid.uuid4())
# Update options for this download
opts = self.ydl_opts.copy()
opts['outtmpl'] = os.path.join(output_dir, f'{file_uuid}.%(ext)s')
try:
with yt_dlp.YoutubeDL(opts) as ydl:
info = ydl.extract_info(url, download=True)
filename = ydl.prepare_filename(info)
# Prepare filename might return the template, we need actual file
# If extension is merged, it might differ.
# But 'best' usually is mp4 for Douyin.
# Let's find the file.
if not os.path.exists(filename):
# Try finding it
for f in os.listdir(output_dir):
if f.startswith(file_uuid):
filename = os.path.join(output_dir, f)
break
return filename, info.get('title')
except Exception as e:
logger.warning(f"yt-dlp failed, trying fallback: {e}")
return self.download_video_fallback(url, output_dir)
except Exception as e:
logger.error(f"Error downloading video: {e}")
return None, None

Binary file not shown.

23
debug_db.py Normal file
View File

@@ -0,0 +1,23 @@
import sys
import os
sys.path.append(os.getcwd())
from Controller.DouYinController import get_db_connection
def check_db():
try:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("SELECT count(*) as cnt FROM t_douyin_record")
count = cursor.fetchone()
print(f"Total records: {count}")
cursor.execute("SELECT * FROM t_douyin_record ORDER BY create_time DESC LIMIT 5")
records = cursor.fetchall()
print(f"Recent records: {records}")
conn.close()
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
check_db()

View File

@@ -497,7 +497,18 @@ body {
margin-top: 16px;
display: flex;
justify-content: center;
gap: 10px;
}
/* Douyin Container */
.douyin-container {
flex: 1;
overflow-y: auto;
padding: 24px;
max-width: 95%;
margin: 0 auto;
width: 100%;
scrollbar-width: thin;
} gap: 10px;
flex-wrap: wrap;
}
@@ -813,29 +824,7 @@ body {
color: #fff;
}
.ad-close {
position: absolute;
top: 15px;
right: 15px;
background: rgba(255, 255, 255, 0.1);
border: none;
border-radius: 50%;
width: 32px;
height: 32px;
color: #fff;
font-size: 20px;
cursor: pointer;
transition: all 0.2s;
display: flex;
align-items: center;
justify-content: center;
line-height: 1;
}
.ad-close:hover {
background: rgba(239, 68, 68, 0.8);
transform: rotate(90deg);
}
.ad-header h2 {
margin: 0 0 30px 0;
@@ -906,14 +895,24 @@ body {
padding-top: 20px;
}
.auto-close-text {
font-size: 13px;
color: #94a3b8;
font-family: monospace;
background: rgba(0, 0, 0, 0.2);
.ad-close-btn {
font-size: 14px;
color: #cbd5e1;
background: rgba(255, 255, 255, 0.1);
display: inline-block;
padding: 4px 12px;
padding: 8px 24px;
border-radius: 20px;
border: 1px solid rgba(148, 163, 184, 0.3);
cursor: pointer;
transition: all 0.2s;
font-family: inherit;
}
.ad-close-btn:hover {
background: rgba(255, 255, 255, 0.2);
color: #fff;
border-color: rgba(255, 255, 255, 0.5);
transform: translateY(-1px);
}
/* Animations */

View File

@@ -17,7 +17,6 @@
<transition name="fade">
<div class="ad-overlay" v-if="showAd">
<div class="ad-content">
<button class="ad-close" @click="closeAd">×</button>
<div class="ad-header">
<h2>⚡ 系统特性介绍</h2>
</div>
@@ -46,6 +45,12 @@
</div>
<p>新增 <strong>智能海报生成</strong> 功能,未来将结合业务数据,一键生成精美的数据战报与营销海报</p>
</div>
<div class="ad-item">
<div class="ad-icon-wrapper">
<span class="ad-icon">🎥</span>
</div>
<p>新增 <strong>抖音知识库</strong>:支持视频解析、知识获取与总结、博主专栏订阅,自动生成 <strong>充电企业知识日报</strong>,助力企业构建专属知识库</p>
</div>
<div class="ad-item">
<div class="ad-icon-wrapper">
<span class="ad-icon">🎯</span>
@@ -60,7 +65,9 @@
</div>
</div>
<div class="ad-footer">
<p class="auto-close-text">{{ adCountdown }} 秒后自动关闭</p>
<button class="ad-close-btn" @click="closeAd">
关闭 ({{ adCountdown }}s)
</button>
</div>
</div>
</div>
@@ -72,6 +79,7 @@
<div class="nav-tabs">
<button class="nav-tab" :class="{active: activeTab==='dashboard'}" @click="activeTab='dashboard'">分时电价分析</button>
<button class="nav-tab" :class="{active: activeTab==='degree'}" @click="activeTab='degree'">智能数据查询</button>
<button class="nav-tab" :class="{active: activeTab==='douyin'}" @click="activeTab='douyin'">抖音知识库</button>
<a href="HaiBao/index.html" class="nav-tab" style="text-decoration: none; display: inline-block;">智能海报生成</a>
</div>
@@ -123,7 +131,13 @@
</div>
<div class="right-panel">
<div class="ai-title">智能决策分析助手</div>
<div class="ai-title">
智能决策分析助手
<button class="btn-primary" @click="startAiAnalysis" :disabled="aiLoading" style="margin-left: auto; font-size: 0.8rem; padding: 4px 12px; height: auto;">
<span v-if="!aiLoading">🚀 开始分析</span>
<span v-else>⏳ 分析中...</span>
</button>
</div>
<div class="ai-box" ref="aiBoxRef">
<div class="ai-question">
<div class="label">当前分析任务</div>
@@ -138,6 +152,104 @@
</div>
</div>
<!-- Douyin Tab -->
<div class="douyin-container" v-show="activeTab==='douyin'" style="padding: 24px; max-width: 95%; margin: 0 auto;">
<div class="douyin-header" style="text-align: center; margin-bottom: 40px;">
<h1 style="font-size: 2.5rem; font-weight: bold; color: #f1f5f9; margin-bottom: 0.5rem;">
抖音知识库
<button class="btn-primary" @click="fetchDouyinRecords" style="font-size: 1rem; padding: 4px 12px; margin-left: 12px; vertical-align: middle;">
🔄 刷新列表
</button>
</h1>
<p style="color: #94a3b8;">自动解析视频、提取文案,构建企业充电知识图谱</p>
</div>
<!-- Input Section -->
<el-card class="box-card" style="margin-bottom: 32px;">
<template #header>
<div class="card-header">
<span style="font-weight: bold; font-size: 1.2rem;">粘贴抖音分享链接</span>
</div>
</template>
<el-input
v-model="shareText"
:rows="15"
type="textarea"
placeholder="支持批量粘贴!
例如:
1.23 复制打开抖音,看看【...】 https://v.douyin.com/...
7.89 复制打开抖音,看看【...】 https://v.douyin.com/..."
style="margin-bottom: 20px; font-size: 16px;"
></el-input>
<div style="display: flex; justify-content: flex-end;">
<el-button type="primary" @click="startParsing" :loading="douyinLoading" :disabled="!shareText.trim()">
{{ douyinLoading ? '解析处理中...' : '开始解析' }}
</el-button>
</div>
</el-card>
<!-- Records List -->
<div class="records-list">
<el-card v-for="record in douyinRecords" :key="record.id" style="margin-bottom: 20px;" :body-style="{ padding: '0px' }">
<div style="display: flex; border-left: 4px solid;" :style="{borderColor: statusColor(record.status)}">
<div style="padding: 24px; flex: 1;">
<div style="display: flex; justify-content: space-between; align-items: start; margin-bottom: 16px;">
<div>
<div style="display: flex; align-items: center; margin-bottom: 8px;">
<el-tag :type="statusType(record.status)" effect="dark" size="small" style="margin-right: 8px;">
{{ record.status }}
</el-tag>
<span style="color: #9ca3af; font-size: 12px;">
{{ formatDate(record.create_time) }}
</span>
</div>
<h3 style="font-size: 1.25rem; font-weight: bold; color: #1f2937; line-height: 1.4;">
{{ record.video_name || '处理中...' }}
</h3>
<a v-if="record.obs_url" :href="record.obs_url" target="_blank" style="color: #3b82f6; font-size: 14px; margin-top: 4px; display: inline-block; text-decoration: none;">
📺 点击观看视频
</a>
</div>
<el-button type="danger" circle @click="deleteRecord(record.id)" plain>
<span style="font-size: 12px;">Del</span>
</el-button>
</div>
<!-- Error Message -->
<div v-if="record.status === 'FAILED' && record.error_msg" style="margin-top: 12px; color: #ef4444; font-size: 13px; background: #fee2e2; padding: 8px; border-radius: 4px; word-break: break-all;">
<strong>Error:</strong> {{ record.error_msg }}
</div>
<!-- Transcript -->
<div v-if="record.transcript" style="background-color: #f9fafb; border-radius: 8px; padding: 16px; margin-top: 16px;">
<h4 style="font-size: 14px; font-weight: 600; color: #4b5563; margin-bottom: 8px; text-transform: uppercase;">视频文案</h4>
<p style="color: #374151; white-space: pre-wrap; font-size: 14px; line-height: 1.6;" :style="record.expanded ? {} : {display: '-webkit-box', '-webkit-line-clamp': '3', '-webkit-box-orient': 'vertical', overflow: 'hidden'}">
{{ record.transcript }}
</p>
<el-button type="primary" link @click="record.expanded = !record.expanded" style="margin-top: 8px; font-size: 12px;">
{{ record.expanded ? '收起' : '展开全文' }}
</el-button>
</div>
<!-- Original Text -->
<div style="margin-top: 16px;">
<div style="cursor: pointer; color: #9ca3af; font-size: 12px;" @click="record.showOriginal = !record.showOriginal">
{{ record.showOriginal ? '收起原始链接' : '查看原始链接信息' }}
</div>
<div v-if="record.showOriginal" style="color: #6b7280; font-size: 12px; background: #f3f4f6; padding: 8px; border-radius: 4px; margin-top: 4px;">
{{ record.original_text }}
</div>
</div>
</div>
</div>
</el-card>
<div v-if="douyinRecords.length === 0" style="text-align: center; color: #9ca3af; padding: 40px;">
<p>暂无记录,请粘贴链接开始解析</p>
</div>
</div>
</div>
<!-- Degree Query Tab -->
<div class="degree-container" v-show="activeTab==='degree'">
<!-- 悬浮二维码 -->

View File

@@ -9,12 +9,18 @@ createApp({
// Ad Overlay State
const showAd = ref(true);
const adCountdown = ref(10);
const adCountdown = ref(15);
let adTimer = null;
const apiBase = ref(window.location.origin || "http://localhost:8000");
const isMobile = ref(window.innerWidth <= 768);
// Douyin State
const shareText = ref('');
const douyinLoading = ref(false);
const douyinRecords = ref([]);
let douyinTimer = null;
// Handle window resize
window.addEventListener('resize', () => {
isMobile.value = window.innerWidth <= 768;
@@ -509,6 +515,86 @@ createApp({
if (adTimer) clearInterval(adTimer);
};
// Douyin Methods
const startParsing = async () => {
if (!shareText.value.trim()) return;
douyinLoading.value = true;
try {
const response = await axios.post(apiBase.value + '/api/parse', { text: shareText.value });
if (response.data.id || (response.data.ids && response.data.ids.length > 0)) {
shareText.value = '';
fetchDouyinRecords();
if (typeof ElementPlus !== 'undefined') {
const count = response.data.ids ? response.data.ids.length : 1;
ElementPlus.ElMessage.success(`成功提交 ${count} 个解析任务`);
}
}
} catch (error) {
console.error('Error:', error);
if (typeof ElementPlus !== 'undefined') ElementPlus.ElMessage.error('解析请求失败');
} finally {
douyinLoading.value = false;
}
};
const fetchDouyinRecords = async (isManual) => {
try {
const response = await axios.get(apiBase.value + '/api/records');
const newRecords = response.data;
douyinRecords.value = newRecords.map(newRec => {
const oldRec = douyinRecords.value.find(r => r.id === newRec.id);
return {
...newRec,
expanded: oldRec ? oldRec.expanded : false,
showOriginal: oldRec ? oldRec.showOriginal : false
};
});
if (isManual === true || (isManual && isManual.type === 'click')) {
if (typeof ElementPlus !== 'undefined') ElementPlus.ElMessage.success('列表已刷新');
}
} catch (error) {
console.error('Error fetching records:', error);
if (isManual === true || (isManual && isManual.type === 'click')) {
if (typeof ElementPlus !== 'undefined') ElementPlus.ElMessage.error('刷新失败');
}
}
};
const deleteRecord = async (id) => {
if (!confirm('确定要删除这条记录吗?')) return;
try {
await axios.delete(apiBase.value + `/api/records/${id}`);
fetchDouyinRecords();
if (typeof ElementPlus !== 'undefined') ElementPlus.ElMessage.success('删除成功');
} catch (error) {
console.error('Error deleting:', error);
if (typeof ElementPlus !== 'undefined') ElementPlus.ElMessage.error('删除失败');
}
};
const statusColor = (status) => {
switch(status) {
case 'COMPLETED': return '#10b981';
case 'FAILED': return '#ef4444';
case 'PROCESSING': return '#3b82f6';
default: return '#9ca3af';
}
};
const statusType = (status) => {
switch(status) {
case 'COMPLETED': return 'success';
case 'FAILED': return 'danger';
case 'PROCESSING': return 'primary';
default: return 'info';
}
};
const formatDate = (dateStr) => {
if (!dateStr) return '';
return new Date(dateStr).toLocaleString();
};
// ==========================================
// Lifecycle
// ==========================================
@@ -516,8 +602,19 @@ createApp({
// Dashboard init
initChart();
loadAllOperatorsPrices();
startAiAnalysis();
// startAiAnalysis(); // Removed auto-start
placeholder.value = "请点击右上角“开始分析”按钮以获取报告。";
// Douyin init
fetchDouyinRecords();
if (douyinTimer) clearInterval(douyinTimer);
douyinTimer = setInterval(() => {
// Only poll if tab is active
if (activeTab.value === 'douyin') {
fetchDouyinRecords();
}
}, 3000);
// Start Chart Type Carousel (10s interval)
if (chartInterval) clearInterval(chartInterval);
chartInterval = setInterval(() => {
@@ -558,6 +655,9 @@ createApp({
// Degree
userQuery, queryLoading, queryResult, examples,
handleDegreeSearch, setExample, renderedResult, stopDegreeGeneration,
// Douyin
shareText, douyinLoading, douyinRecords,
startParsing, fetchDouyinRecords, deleteRecord, statusColor, statusType, formatDate,
// Ad
showAd, adCountdown, closeAd
};

53
test_title_extraction.py Normal file
View File

@@ -0,0 +1,53 @@
import sys
import os
import re
# Mock class since we can't import easily without full env
class DouYinDownloaderMock:
def extract_title_from_text(self, text: str) -> str:
"""
Extract title from share text by removing URLs and common prefixes
"""
# 1. Remove URLs
clean_text = re.sub(r'http[s]?://\S+', '', text)
# 2. Remove "Copy open Douyin..." prefix patterns
# Example: "3.00 12/28 d@n.dN VYZ:/ 复制打开抖音极速版,看看【聚合能研的作品】..."
# Pattern: Any chars + "复制打开抖音" + any chars + ",看看"
clean_text = re.sub(r'.*?复制打开抖音.*?,看看', '', clean_text)
# 3. Remove 【...】 if it's at the start (usually author name)
clean_text = re.sub(r'^\s*【.*?】', '', clean_text)
# 4. Clean up whitespace
clean_text = clean_text.strip()
return clean_text if clean_text else "Unknown Title"
def test():
downloader = DouYinDownloaderMock()
# Case 1: User example
text1 = "3.00 12/28 d@n.dN VYZ:/ 复制打开抖音极速版看看【聚合能研的作品】2026年电力市场的 “大洗牌” 正式开始 告别... https://v.douyin.com/gHWfWVgDVRo/"
title1 = downloader.extract_title_from_text(text1)
print(f"Input 1: {text1}")
print(f"Title 1: {title1}")
print("-" * 20)
# Case 2: Pure text
text2 = "2026年电力市场的 “大洗牌” 正式开始 告别..."
title2 = downloader.extract_title_from_text(text2)
print(f"Input 2: {text2}")
print(f"Title 2: {title2}")
print("-" * 20)
# Case 3: Text with URL only
text3 = "Check this out https://v.douyin.com/abc/"
title3 = downloader.extract_title_from_text(text3)
print(f"Input 3: {text3}")
print(f"Title 3: {title3}")
print("-" * 20)
if __name__ == "__main__":
test()