diff --git a/WeiXin/T2_ChatMonitor.py b/WeiXin/T2_ChatMonitor.py index e4a0979..ba51a2e 100644 --- a/WeiXin/T2_ChatMonitor.py +++ b/WeiXin/T2_ChatMonitor.py @@ -190,6 +190,36 @@ class ChatMonitorBot: logger.info("-" * 20) logger.info("="*50 + "\n") + # --- LLM 总结 --- + logger.info("🤖 正在请求 LLM 生成对话摘要...") + chat_history_text = "" + for msg in self.dialogue_log: + sender = msg.get('sender', '未知') + content = msg.get('content', '') + type_str = "[语音]" if msg.get('type') == 'voice' else "[文字]" + chat_history_text += f"{sender}{type_str}: {content}\n" + + prompt = ( + "请根据以下微信对话记录,总结归纳双方交流的主要信息点。\n" + "要求:\n" + "1. 简明扼要,分点列出。\n" + "2. 明确指出双方达成的一致或待解决的问题。\n" + "3. 忽略无关的寒暄。\n\n" + f"对话记录:\n{chat_history_text}" + ) + + try: + full_response = "" + async for chunk in get_llm_response(prompt, stream=True): + full_response += chunk + + logger.info("\n" + "="*20 + " 对话摘要 (LLM) " + "="*20) + logger.info(full_response) + logger.info("="*55 + "\n") + + except Exception as e: + logger.error(f"LLM 摘要生成失败: {e}") + # 初始化最后处理的消息哈希,避免重复回复第一条 last_msg = self.dialogue_log[-1] # last_msg 是字典,需要转字符串再 encode diff --git a/WeiXin/WxUtil.py b/WeiXin/WxUtil.py index 1f69cc5..ce1c05d 100644 --- a/WeiXin/WxUtil.py +++ b/WeiXin/WxUtil.py @@ -1,6 +1,7 @@ # coding=utf-8 import uiautomator2 as u2 import time +import asyncio import logging import sys import os @@ -392,6 +393,9 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name=" # 记录 Peek-and-Restore 过程中抓取到的语音内容 {y_coord: content} captured_voice_contents = {} + # 初始化异步任务列表 + analyze_chat_image._ocr_tasks = [] + while loop_count < MAX_LOOPS: loop_count += 1 logger.info(f"--- 分析循环 第 {loop_count} 次 ---") @@ -490,40 +494,40 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name=" logger.info("等待语音转文字完成...") time.sleep(3.0) # 缩短等待时间 (原5.0s) - # --- Peek-and-Restore 逻辑 --- + # --- Peek-and-Restore 逻辑 (异步优化版) --- - # 1. 截图读取内容 + # 1. 截图 (但不立即 OCR,而是丢给异步任务) peek_shot = get_next_debug_path("step_peek_content") d.screenshot(peek_shot) - logger.info("正在读取转换后的语音内容...") - peek_messages, _, _ = _scan_chat_messages(peek_shot) + logger.info("已截图,启动异步OCR任务以提取内容...") - # 2. 查找并保存内容 - found_content = None - current_voice_in_peek = None - for pm in peek_messages: - if pm['type'] == 'voice' and pm.get('is_converted'): - # 简单匹配:Y坐标接近 (容差 50px) - # 注意:如果文字展开,下方元素会被推下去,但当前语音本身的位置变化取决于展开方向 - # 通常语音条下方展开文字,语音条本身Y坐标变化不大 - if abs(pm['y'] - vy) < 50: - found_content = pm.get('content') - current_voice_in_peek = pm - break - - if found_content: - logger.info(f"✅ [Peek] 成功抓取语音内容: {found_content}") - captured_voice_contents[target['y']] = found_content - else: - logger.warning("⚠️ [Peek] 未能抓取到语音内容 (可能识别失败)") - - # 3. 还原状态 (取消转文字) - logger.info("准备还原状态 (取消转文字)...") - click_x, click_y = vx, vy - if current_voice_in_peek: - click_x, click_y = int(current_voice_in_peek['center'][0]), int(current_voice_in_peek['center'][1]) + async def _async_ocr_task(img_path, target_y): + """内部异步任务:在线程池中运行 OCR""" + loop = asyncio.get_running_loop() + # 在默认执行器(线程池)中运行耗时的 _scan_chat_messages + msgs, _, _ = await loop.run_in_executor(None, _scan_chat_messages, img_path) - d.long_click(click_x, click_y, 1.0) # 缩短按压时间 + found = None + for pm in msgs: + if pm['type'] == 'voice' and pm.get('is_converted'): + if abs(pm['y'] - target_y) < 50: + found = pm.get('content') + break + return target_y, found + + # 创建并保存任务 + task = asyncio.create_task(_async_ocr_task(peek_shot, vy)) + # 我们需要一个列表来保存任务,这里临时利用 list + if not hasattr(analyze_chat_image, "_ocr_tasks"): + analyze_chat_image._ocr_tasks = [] + analyze_chat_image._ocr_tasks.append(task) + + # 2. 立即还原状态 (取消转文字) + # 注意:由于 OCR 还没出结果,我们无法精确定位展开后的文字位置 + # 但通常点击原语音气泡位置 (vx, vy) 也能触发菜单 + logger.info("准备还原状态 (取消转文字)...") + + d.long_click(vx, vy, 1.0) # 盲点原坐标 logger.info("正在快速寻找'隐藏文字'按钮...") cancel_template = os.path.join(TEMPLATE_DIR, "cancel_zhuan_wen_zi.jpg") @@ -547,7 +551,7 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name=" else: logger.warning("❌ 未找到'隐藏文字'按钮,无法还原状态!(后续可能导致重复处理)") - # 4. 准备下一次循环 + # 3. 准备下一次循环 # 重新截图,因为界面可能微调,或者只是恢复了 next_screenshot = get_next_debug_path("step_restored") d.screenshot(next_screenshot) @@ -564,6 +568,17 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name=" logger.info("跳过当前语音,继续扫描...") continue + # 循环结束后,等待所有异步 OCR 任务完成 + if hasattr(analyze_chat_image, "_ocr_tasks") and analyze_chat_image._ocr_tasks: + logger.info(f"等待 {len(analyze_chat_image._ocr_tasks)} 个异步 OCR 任务完成...") + results = await asyncio.gather(*analyze_chat_image._ocr_tasks) + for y, content in results: + if content: + captured_voice_contents[y] = content + logger.info(f"✅ [Async OCR] 异步获取到语音内容 (y={y}): {content}") + # 清空任务列表 + analyze_chat_image._ocr_tasks = [] + # 循环结束,返回最后一次分析的结果 if not final_messages: # 如果循环因为 max_loops 退出,确保有结果 final_messages = messages @@ -612,6 +627,16 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name=" # 统一生成 dialogue_log for msg in final_messages: + # 尝试注入异步获取的语音内容 + if msg['type'] == 'voice': + # 模糊匹配 Y 坐标 + for y_key, content in captured_voice_contents.items(): + if abs(msg['y'] - y_key) < 20: + msg['is_converted'] = True + msg['content'] = content + logger.info(f"注入语音内容到最终消息列表: {content}") + break + # 只添加有内容的文本消息,或已转换且有内容的语音消息 if msg['type'] == 'text' and msg.get('content'): dialogue_log.append(msg) diff --git a/WeiXin/__pycache__/T2_ChatMonitor.cpython-310.pyc b/WeiXin/__pycache__/T2_ChatMonitor.cpython-310.pyc new file mode 100644 index 0000000..234ca5e Binary files /dev/null and b/WeiXin/__pycache__/T2_ChatMonitor.cpython-310.pyc differ diff --git a/WeiXin/__pycache__/WxUtil.cpython-310.pyc b/WeiXin/__pycache__/WxUtil.cpython-310.pyc index 2508ae4..a2634e7 100644 Binary files a/WeiXin/__pycache__/WxUtil.cpython-310.pyc and b/WeiXin/__pycache__/WxUtil.cpython-310.pyc differ