'commit'

2026-01-26 20:05:00 +08:00
parent cb72f02030
commit 26b22c6e8f
4 changed files with 85 additions and 30 deletions
--- a/WeiXin/T2_ChatMonitor.py
+++ b/WeiXin/T2_ChatMonitor.py
@@ -190,6 +190,36 @@ class ChatMonitorBot:
                logger.info("-" * 20)
            logger.info("="*50 + "\n")
            
+            # --- LLM 总结 ---
+            logger.info("🤖 正在请求 LLM 生成对话摘要...")
+            chat_history_text = ""
+            for msg in self.dialogue_log:
+                sender = msg.get('sender', '未知')
+                content = msg.get('content', '')
+                type_str = "[语音]" if msg.get('type') == 'voice' else "[文字]"
+                chat_history_text += f"{sender}{type_str}: {content}\n"
+            
+            prompt = (
+                "请根据以下微信对话记录，总结归纳双方交流的主要信息点。\n"
+                "要求：\n"
+                "1. 简明扼要，分点列出。\n"
+                "2. 明确指出双方达成的一致或待解决的问题。\n"
+                "3. 忽略无关的寒暄。\n\n"
+                f"对话记录：\n{chat_history_text}"
+            )
+            
+            try:
+                full_response = ""
+                async for chunk in get_llm_response(prompt, stream=True):
+                    full_response += chunk
+                
+                logger.info("\n" + "="*20 + " 对话摘要 (LLM) " + "="*20)
+                logger.info(full_response)
+                logger.info("="*55 + "\n")
+                
+            except Exception as e:
+                logger.error(f"LLM 摘要生成失败: {e}")
+
            # 初始化最后处理的消息哈希，避免重复回复第一条
            last_msg = self.dialogue_log[-1]
            # last_msg 是字典，需要转字符串再 encode
--- a/WeiXin/WxUtil.py
+++ b/WeiXin/WxUtil.py
@@ -1,6 +1,7 @@
 # coding=utf-8
 import uiautomator2 as u2
 import time
+import asyncio
 import logging
 import sys
 import os
@@ -392,6 +393,9 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
        # 记录 Peek-and-Restore 过程中抓取到的语音内容 {y_coord: content}
        captured_voice_contents = {}
        
+        # 初始化异步任务列表
+        analyze_chat_image._ocr_tasks = []
+        
        while loop_count < MAX_LOOPS:
            loop_count += 1
            logger.info(f"--- 分析循环 第 {loop_count} 次 ---")
@@ -490,40 +494,40 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
                logger.info("等待语音转文字完成...")
                time.sleep(3.0) # 缩短等待时间 (原5.0s)
                
-                # --- Peek-and-Restore 逻辑 ---
+                # --- Peek-and-Restore 逻辑 (异步优化版) ---
                
-                # 1. 截图读取内容
+                # 1. 截图 (但不立即 OCR，而是丢给异步任务)
                peek_shot = get_next_debug_path("step_peek_content")
                d.screenshot(peek_shot)
-                logger.info("正在读取转换后的语音内容...")
-                peek_messages, _, _ = _scan_chat_messages(peek_shot)
+                logger.info("已截图，启动异步OCR任务以提取内容...")
                
-                # 2. 查找并保存内容
-                found_content = None
-                current_voice_in_peek = None
-                for pm in peek_messages:
-                    if pm['type'] == 'voice' and pm.get('is_converted'):
-                        # 简单匹配：Y坐标接近 (容差 50px)
-                        # 注意：如果文字展开，下方元素会被推下去，但当前语音本身的位置变化取决于展开方向
-                        # 通常语音条下方展开文字，语音条本身Y坐标变化不大
-                        if abs(pm['y'] - vy) < 50:
-                            found_content = pm.get('content')
-                            current_voice_in_peek = pm
-                            break
-                
-                if found_content:
-                    logger.info(f"✅ [Peek] 成功抓取语音内容: {found_content}")
-                    captured_voice_contents[target['y']] = found_content
-                else:
-                    logger.warning("⚠️ [Peek] 未能抓取到语音内容 (可能识别失败)")
-
-                # 3. 还原状态 (取消转文字)
-                logger.info("准备还原状态 (取消转文字)...")
-                click_x, click_y = vx, vy
-                if current_voice_in_peek:
-                    click_x, click_y = int(current_voice_in_peek['center'][0]), int(current_voice_in_peek['center'][1])
+                async def _async_ocr_task(img_path, target_y):
+                    """内部异步任务：在线程池中运行 OCR"""
+                    loop = asyncio.get_running_loop()
+                    # 在默认执行器(线程池)中运行耗时的 _scan_chat_messages
+                    msgs, _, _ = await loop.run_in_executor(None, _scan_chat_messages, img_path)
                    
-                d.long_click(click_x, click_y, 1.0) # 缩短按压时间
+                    found = None
+                    for pm in msgs:
+                        if pm['type'] == 'voice' and pm.get('is_converted'):
+                            if abs(pm['y'] - target_y) < 50:
+                                found = pm.get('content')
+                                break
+                    return target_y, found
+
+                # 创建并保存任务
+                task = asyncio.create_task(_async_ocr_task(peek_shot, vy))
+                # 我们需要一个列表来保存任务，这里临时利用 list
+                if not hasattr(analyze_chat_image, "_ocr_tasks"):
+                    analyze_chat_image._ocr_tasks = []
+                analyze_chat_image._ocr_tasks.append(task)
+
+                # 2. 立即还原状态 (取消转文字)
+                # 注意：由于 OCR 还没出结果，我们无法精确定位展开后的文字位置
+                # 但通常点击原语音气泡位置 (vx, vy) 也能触发菜单
+                logger.info("准备还原状态 (取消转文字)...")
+                
+                d.long_click(vx, vy, 1.0) # 盲点原坐标
                
                logger.info("正在快速寻找'隐藏文字'按钮...")
                cancel_template = os.path.join(TEMPLATE_DIR, "cancel_zhuan_wen_zi.jpg")
@@ -547,7 +551,7 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
                else:
                    logger.warning("❌ 未找到'隐藏文字'按钮，无法还原状态！(后续可能导致重复处理)")
                
-                # 4. 准备下一次循环
+                # 3. 准备下一次循环
                # 重新截图，因为界面可能微调，或者只是恢复了
                next_screenshot = get_next_debug_path("step_restored")
                d.screenshot(next_screenshot)
@@ -564,6 +568,17 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
                logger.info("跳过当前语音，继续扫描...")
                continue

+        # 循环结束后，等待所有异步 OCR 任务完成
+        if hasattr(analyze_chat_image, "_ocr_tasks") and analyze_chat_image._ocr_tasks:
+            logger.info(f"等待 {len(analyze_chat_image._ocr_tasks)} 个异步 OCR 任务完成...")
+            results = await asyncio.gather(*analyze_chat_image._ocr_tasks)
+            for y, content in results:
+                if content:
+                    captured_voice_contents[y] = content
+                    logger.info(f"✅ [Async OCR] 异步获取到语音内容 (y={y}): {content}")
+            # 清空任务列表
+            analyze_chat_image._ocr_tasks = []
+
        # 循环结束，返回最后一次分析的结果
        if not final_messages: # 如果循环因为 max_loops 退出，确保有结果
            final_messages = messages
@@ -612,6 +627,16 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
        
        # 统一生成 dialogue_log
        for msg in final_messages:
+             # 尝试注入异步获取的语音内容
+             if msg['type'] == 'voice':
+                 # 模糊匹配 Y 坐标
+                 for y_key, content in captured_voice_contents.items():
+                     if abs(msg['y'] - y_key) < 20:
+                         msg['is_converted'] = True
+                         msg['content'] = content
+                         logger.info(f"注入语音内容到最终消息列表: {content}")
+                         break
+
             # 只添加有内容的文本消息，或已转换且有内容的语音消息
             if msg['type'] == 'text' and msg.get('content'):
                 dialogue_log.append(msg)
--- a/WeiXin/pycache/T2_ChatMonitor.cpython-310.pyc
+++ b/WeiXin/pycache/T2_ChatMonitor.cpython-310.pyc
--- a/WeiXin/pycache/WxUtil.cpython-310.pyc
+++ b/WeiXin/pycache/WxUtil.cpython-310.pyc