This commit is contained in:
HuangHai
2026-01-26 20:05:00 +08:00
parent cb72f02030
commit 26b22c6e8f
4 changed files with 85 additions and 30 deletions

View File

@@ -190,6 +190,36 @@ class ChatMonitorBot:
logger.info("-" * 20)
logger.info("="*50 + "\n")
# --- LLM 总结 ---
logger.info("🤖 正在请求 LLM 生成对话摘要...")
chat_history_text = ""
for msg in self.dialogue_log:
sender = msg.get('sender', '未知')
content = msg.get('content', '')
type_str = "[语音]" if msg.get('type') == 'voice' else "[文字]"
chat_history_text += f"{sender}{type_str}: {content}\n"
prompt = (
"请根据以下微信对话记录,总结归纳双方交流的主要信息点。\n"
"要求:\n"
"1. 简明扼要,分点列出。\n"
"2. 明确指出双方达成的一致或待解决的问题。\n"
"3. 忽略无关的寒暄。\n\n"
f"对话记录:\n{chat_history_text}"
)
try:
full_response = ""
async for chunk in get_llm_response(prompt, stream=True):
full_response += chunk
logger.info("\n" + "="*20 + " 对话摘要 (LLM) " + "="*20)
logger.info(full_response)
logger.info("="*55 + "\n")
except Exception as e:
logger.error(f"LLM 摘要生成失败: {e}")
# 初始化最后处理的消息哈希,避免重复回复第一条
last_msg = self.dialogue_log[-1]
# last_msg 是字典,需要转字符串再 encode

View File

@@ -1,6 +1,7 @@
# coding=utf-8
import uiautomator2 as u2
import time
import asyncio
import logging
import sys
import os
@@ -392,6 +393,9 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
# 记录 Peek-and-Restore 过程中抓取到的语音内容 {y_coord: content}
captured_voice_contents = {}
# 初始化异步任务列表
analyze_chat_image._ocr_tasks = []
while loop_count < MAX_LOOPS:
loop_count += 1
logger.info(f"--- 分析循环 第 {loop_count} 次 ---")
@@ -490,40 +494,40 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
logger.info("等待语音转文字完成...")
time.sleep(3.0) # 缩短等待时间 (原5.0s)
# --- Peek-and-Restore 逻辑 ---
# --- Peek-and-Restore 逻辑 (异步优化版) ---
# 1. 截图读取内容
# 1. 截图 (但不立即 OCR而是丢给异步任务)
peek_shot = get_next_debug_path("step_peek_content")
d.screenshot(peek_shot)
logger.info("正在读取转换后的语音内容...")
peek_messages, _, _ = _scan_chat_messages(peek_shot)
logger.info("已截图启动异步OCR任务以提取内容...")
# 2. 查找并保存内容
found_content = None
current_voice_in_peek = None
for pm in peek_messages:
if pm['type'] == 'voice' and pm.get('is_converted'):
# 简单匹配Y坐标接近 (容差 50px)
# 注意:如果文字展开,下方元素会被推下去,但当前语音本身的位置变化取决于展开方向
# 通常语音条下方展开文字语音条本身Y坐标变化不大
if abs(pm['y'] - vy) < 50:
found_content = pm.get('content')
current_voice_in_peek = pm
break
async def _async_ocr_task(img_path, target_y):
"""内部异步任务:在线程池中运行 OCR"""
loop = asyncio.get_running_loop()
# 在默认执行器(线程池)中运行耗时的 _scan_chat_messages
msgs, _, _ = await loop.run_in_executor(None, _scan_chat_messages, img_path)
if found_content:
logger.info(f"✅ [Peek] 成功抓取语音内容: {found_content}")
captured_voice_contents[target['y']] = found_content
else:
logger.warning("⚠️ [Peek] 未能抓取到语音内容 (可能识别失败)")
found = None
for pm in msgs:
if pm['type'] == 'voice' and pm.get('is_converted'):
if abs(pm['y'] - target_y) < 50:
found = pm.get('content')
break
return target_y, found
# 3. 还原状态 (取消转文字)
# 创建并保存任务
task = asyncio.create_task(_async_ocr_task(peek_shot, vy))
# 我们需要一个列表来保存任务,这里临时利用 list
if not hasattr(analyze_chat_image, "_ocr_tasks"):
analyze_chat_image._ocr_tasks = []
analyze_chat_image._ocr_tasks.append(task)
# 2. 立即还原状态 (取消转文字)
# 注意:由于 OCR 还没出结果,我们无法精确定位展开后的文字位置
# 但通常点击原语音气泡位置 (vx, vy) 也能触发菜单
logger.info("准备还原状态 (取消转文字)...")
click_x, click_y = vx, vy
if current_voice_in_peek:
click_x, click_y = int(current_voice_in_peek['center'][0]), int(current_voice_in_peek['center'][1])
d.long_click(click_x, click_y, 1.0) # 缩短按压时间
d.long_click(vx, vy, 1.0) # 盲点原坐标
logger.info("正在快速寻找'隐藏文字'按钮...")
cancel_template = os.path.join(TEMPLATE_DIR, "cancel_zhuan_wen_zi.jpg")
@@ -547,7 +551,7 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
else:
logger.warning("❌ 未找到'隐藏文字'按钮,无法还原状态!(后续可能导致重复处理)")
# 4. 准备下一次循环
# 3. 准备下一次循环
# 重新截图,因为界面可能微调,或者只是恢复了
next_screenshot = get_next_debug_path("step_restored")
d.screenshot(next_screenshot)
@@ -564,6 +568,17 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
logger.info("跳过当前语音,继续扫描...")
continue
# 循环结束后,等待所有异步 OCR 任务完成
if hasattr(analyze_chat_image, "_ocr_tasks") and analyze_chat_image._ocr_tasks:
logger.info(f"等待 {len(analyze_chat_image._ocr_tasks)} 个异步 OCR 任务完成...")
results = await asyncio.gather(*analyze_chat_image._ocr_tasks)
for y, content in results:
if content:
captured_voice_contents[y] = content
logger.info(f"✅ [Async OCR] 异步获取到语音内容 (y={y}): {content}")
# 清空任务列表
analyze_chat_image._ocr_tasks = []
# 循环结束,返回最后一次分析的结果
if not final_messages: # 如果循环因为 max_loops 退出,确保有结果
final_messages = messages
@@ -612,6 +627,16 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
# 统一生成 dialogue_log
for msg in final_messages:
# 尝试注入异步获取的语音内容
if msg['type'] == 'voice':
# 模糊匹配 Y 坐标
for y_key, content in captured_voice_contents.items():
if abs(msg['y'] - y_key) < 20:
msg['is_converted'] = True
msg['content'] = content
logger.info(f"注入语音内容到最终消息列表: {content}")
break
# 只添加有内容的文本消息,或已转换且有内容的语音消息
if msg['type'] == 'text' and msg.get('content'):
dialogue_log.append(msg)

Binary file not shown.