'commit'

2026-01-25 12:52:52 +08:00
parent 59dc95cd9d
commit bf485d10f1
30 changed files with 523 additions and 688 deletions
--- a/WeiXin/Screenshots/chat_interface_check.jpg
+++ b/WeiXin/Screenshots/chat_interface_check.jpg
--- a/WeiXin/Screenshots/debug_20260125_115039_coord_01_before_click_input.jpg
+++ b/WeiXin/Screenshots/debug_20260125_115039_coord_01_before_click_input.jpg
--- a/WeiXin/Screenshots/debug_20260125_115040_coord_02_after_click_input_keyboard.jpg
+++ b/WeiXin/Screenshots/debug_20260125_115040_coord_02_after_click_input_keyboard.jpg
--- a/WeiXin/Screenshots/debug_20260125_115041_coord_03_after_input_text.jpg
+++ b/WeiXin/Screenshots/debug_20260125_115041_coord_03_after_input_text.jpg
--- a/WeiXin/Screenshots/debug_send_check_20260125_115042.jpg
+++ b/WeiXin/Screenshots/debug_send_check_20260125_115042.jpg
--- a/WeiXin/Screenshots/t5_monitor_analyzed.jpg
+++ b/WeiXin/Screenshots/t5_monitor_analyzed.jpg
--- a/WeiXin/Screenshots/t5_monitor_temp.jpg
+++ b/WeiXin/Screenshots/t5_monitor_temp.jpg
--- a/WeiXin/Screenshots/t6_debug_result.jpg
+++ b/WeiXin/Screenshots/t6_debug_result.jpg
--- a/WeiXin/Screenshots/t6_debug_temp.jpg
+++ b/WeiXin/Screenshots/t6_debug_temp.jpg
--- a/WeiXin/Screenshots/t6_menu_shot.jpg
+++ b/WeiXin/Screenshots/t6_menu_shot.jpg
--- a/WeiXin/T5_AutoChatMonitor.py
+++ b/WeiXin/T5_AutoChatMonitor.py
@@ -24,7 +24,7 @@ logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
-        logging.FileHandler(os.path.join(log_dir, "T5_AutoChatMonitor.log"), encoding='utf-8'),
+        logging.FileHandler(os.path.join(log_dir, "T5_AutoChatMonitor.log"), encoding='utf-8', mode='w'),
        logging.StreamHandler()
    ]
 )
@@ -46,18 +46,19 @@ class ChatBot:
            os.makedirs(self.screenshot_dir)

        self.persona = (
-            "你是一名1999年毕业、拥有27年一线教学经验的小学高级女教师，名叫‘小张老师’（家长们也常叫你张老师）。你目前在‘长春市少惠林作文素养培养中心’工作。"
-            "你不仅是一位作文教学专家，批阅过超过2万篇作文，更是一位心思细腻、懂得孩子心理、能与家长共情的教育智者。"
-            "你的回复风格应该是：温柔、知性、亲切且极具亲和力，就像一位邻家大姐姐或贴心的闺蜜在和家长聊天。"
-            "严禁使用任何列表格式（如 1. 2. 3. 或 A. B. C.），严禁使用‘首先、其次、最后’等刻板的逻辑词。"
-            "要用连贯、优美、富有生活气息的文字进行叙述。每一句话都要带温度，要善于用‘咱们孩子’、‘作为老师我也理解’等词汇拉近距离。"
-            "回复内容要有深度，不要只给结论，要讲透背后的教育逻辑。字数必须严格控制在 100-200 字之间。"
-            "如果涉及到校区信息，必须且只能使用以下真实数据，绝对严禁使用任何占位符：\n"
+            "你是一名1999年毕业、拥有27年一线教学经验的小学高级女教师，名叫‘大张老师’。你目前在‘长春市少惠林作文素养培养中心’工作。"
+            "你不仅是一位作文教学专家，更是一位心思细腻、能与家长共情的教育智者。"
+            "你的回复风格应该是：温柔、知性、亲切，就像一位邻家大姐姐在聊天。"
+            "【严格约束】：\n"
+            "1. 绝对禁止发散！绝对禁止幻觉！\n"
+            "2. 知道什么就说什么，不要乱讲话，不要自己编造内容！\n"
+            "3. 仅针对家长明确表达的内容进行回复。\n"
+            "4. 严禁使用列表格式。严禁使用‘首先、其次’等逻辑词。\n"
+            "5. 回复必须简练，字数严格控制在 50 字以内！\n"
+            "如果涉及到校区信息，必须且只能使用以下真实数据：\n"
            "- 单位：长春市少惠林作文素养培养中心\n"
            "- 地址：南环城路与临河街交汇，TOUCH12街3楼325号\n"
-            "- 联系人：小张老师（电话：18686619970）\n"
-            "- 课程：线上/线下作文课、读书营/阅读策略营（假期开设）\n"
-            "- 上课：作文通常在周二/周四晚上，周六/周日全天"
+            "- 联系人：小张老师（电话：18686619970）"
        )

    async def get_reply(self, history_text, is_proactive=False):
@@ -67,18 +68,17 @@ class ChatBot:
                f"【对话背景】：家长已经超过5分钟没有回应了。\n"
                f"【近期聊天记录】：\n{history_text}\n\n"
                "【任务要求】：\n"
-                "请作为小张老师，给家长发一段主动关怀的消息。不要催促，而是以‘刚才突然想到’或者‘又想起咱们孩子之前提到的’为由头，"
-                "再补充一点有价值的教学点滴，或者分享一个能缓解焦虑的小故事。语气要温柔亲切，字数在 100-200 字之间。"
-                "全文必须是连贯的段落，严禁列条目！如果提到联系方式或地址，必须使用人设中的真实数据，严禁占位符。"
+                "请作为大张老师，给家长发一段简短的关怀消息。不要催促，语气温柔。"
+                "字数严格控制在 50 字以内。不要编造事实。"
            )
        else:
            prompt = (
                f"【教师人设】：{self.persona}\n\n"
                f"【近期聊天记录】：\n{history_text}\n\n"
                "【任务要求】：\n"
-                "请作为小张老师，给家长写一段暖心且有深度的回复。针对家长最后的消息，先给予情感上的关怀，再结合27年经验给出具体指导。"
-                "展现出资深女教师的温柔与智慧。全文必须是一个或两个完整的自然段，绝对禁止分点列项！字数严格在 100-200 字之间。"
-                "如果提到联系方式或地址，必须使用人设中的真实数据，严禁占位符。直接输出回复的正文内容。"
+                "请作为大张老师回复家长。针对家长的具体问题或话语进行回复。"
+                "严禁发散，严禁编造家长没说过的情况。如果不清楚家长的意图，就温柔询问。"
+                "字数严格控制在 50 字以内。直接输出回复正文。"
            )
        
        full_response = ""
@@ -94,11 +94,11 @@ class ChatBot:
        
        while True:
            try:
-                # 1. 检查是否在微信聊天界面
-                if not is_in_chat_interface(self.d):
-                    logger.warning("⚠️ 当前不在微信聊天界面，等待下一次扫描...")
-                    await asyncio.sleep(CHECK_INTERVAL)
-                    continue
+                # 1. 检查是否在微信聊天界面 (改为通过 VLM 识别结果判断，不再使用 UI 检查)
+                # if not is_in_chat_interface(self.d):
+                #     logger.warning("⚠️ 当前不在微信聊天界面，等待下一次扫描...")
+                #     await asyncio.sleep(CHECK_INTERVAL)
+                #     continue

                logger.info("🔍 正在扫描当前界面内容...")
                # 1. 截图并分析
@@ -109,7 +109,13 @@ class ChatBot:
                self.d.screenshot(tmp_shot)
                
                logger.info("🎨 正在分析聊天界面内容 (检测头像与对话)...")
-                dialogue_log = analyze_chat_image(tmp_shot, analyzed_shot)
+                # analyze_chat_image 现在会返回 None, None 如果不是聊天界面
+                dialogue_log, input_center = await analyze_chat_image(tmp_shot, analyzed_shot, device=self.d)
+
+                if dialogue_log is None:
+                    logger.warning("⚠️ VLM 判断当前不在微信聊天界面，或无法识别。")
+                    await asyncio.sleep(CHECK_INTERVAL)
+                    continue
                
                # 语音转文字处理
                if dialogue_log == "VOICE_CONVERTING":
@@ -131,13 +137,25 @@ class ChatBot:

                # 判断逻辑：如果最后一条消息是“对方”发的，且与上次不同，则回复
                if "对方:" in current_last_msg and current_last_msg != self.last_message_text:
+                    # 关键检查：如果包含 "(待转换)"，说明语音还没转文字，绝对不能回复
+                    if "(待转换)" in current_last_msg:
+                        logger.info(f"🚫 检测到未转换的语音消息，跳过回复生成，等待转文字... ({current_last_msg})")
+                        await asyncio.sleep(2) # 稍作等待
+                        continue
+
                    logger.info(f"📩 检测到新消息: {current_last_msg}")
                    
                    reply = await self.get_reply(history_text)
                    logger.info(f"🤖 生成回复: {reply}")
                    
                    # 执行输入发送
-                    center_point, _ = find_input_box_center(tmp_shot)
+                    if input_center:
+                        center_point = input_center
+                        logger.info(f"📍 使用 VLM 识别的输入框坐标: {center_point}")
+                    else:
+                        center_point, _ = find_input_box_center(tmp_shot)
+                        logger.info(f"📍 使用 CV 识别的输入框坐标: {center_point}")
+
                    # 即使 CV 没找到坐标，也尝试执行，因为 perform_input_action 内部有原生控件识别
                    perform_input_action(self.d, center_point, reply, auto_send=True)
                    self.last_message_text = f"我: {reply}" # 更新状态，避免重复回复自己
@@ -154,7 +172,11 @@ class ChatBot:
                        proactive_reply = await self.get_reply(history_text, is_proactive=True)
                        logger.info(f"🤖 发起主动询问: {proactive_reply}")
                        
-                        center_point, _ = find_input_box_center(tmp_shot)
+                        if input_center:
+                            center_point = input_center
+                        else:
+                            center_point, _ = find_input_box_center(tmp_shot)
+                        
                        # 同上，解耦 CV 坐标
                        perform_input_action(self.d, center_point, proactive_reply, auto_send=True)
                        self.proactive_count += 1
--- a/WeiXin/T6_VLM_Voice_Debug.py
+++ b/WeiXin/T6_VLM_Voice_Debug.py
@@ -0,0 +1,163 @@
+# coding=utf-8
+import asyncio
+import logging
+import os
+import sys
+import time
+
+import cv2
+import uiautomator2 as u2
+
+# 添加项目根目录到 sys.path
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if project_root not in sys.path:
+    sys.path.append(project_root)
+
+from WeiXin.WxUtil import get_vlm_analysis
+from Util.EasyOcrKit import EasyOcrKit
+
+# 配置日志
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger("T6_Debug")
+
+async def main():
+    logger.info("🚀 T6 VLM 语音坐标调试工具启动...")
+    
+    # 连接设备
+    try:
+        d = u2.connect()
+        logger.info(f"设备已连接: {d.info.get('serial')}")
+    except Exception as e:
+        logger.error(f"设备连接失败: {e}")
+        return
+
+    # 截图目录
+    screenshots_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
+    if not os.path.exists(screenshots_dir):
+        os.makedirs(screenshots_dir)
+
+    # 截图
+    screenshot_path = os.path.join(screenshots_dir, "t6_debug_temp.jpg")
+    logger.info("📸 正在截图...")
+    d.screenshot(screenshot_path)
+    
+    # 调用 VLM 分析
+    logger.info("🧠 正在调用 VLM 分析图片...")
+    result_data = await get_vlm_analysis(screenshot_path)
+    
+    if not result_data:
+        logger.error("❌ VLM 分析返回为空")
+        return
+
+    logger.info(f"VLM 返回结果: {result_data}")
+
+    # 读取图片用于绘制
+    img = cv2.imread(screenshot_path)
+    if img is None:
+        logger.error("❌ 无法读取截图文件")
+        return
+
+    messages = result_data.get("messages", [])
+    voice_count = 0
+    
+    for msg in messages:
+        msg_type = msg.get("type")
+        content = msg.get("content")
+        coords = msg.get("coordinates") or msg.get("center")
+        
+        if not coords:
+            continue
+            
+        x, y = coords
+        
+        if msg_type == "voice":
+            voice_count += 1
+            logger.info(f"🎤 发现语音消息: {content}, 坐标: ({x}, {y})")
+            
+            # 绘制绿框 (语音)
+            w, h = 300, 80
+            top_left = (int(x - w/2), int(y - h/2))
+            bottom_right = (int(x + w/2), int(y + h/2))
+            
+            cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 3)
+            cv2.circle(img, (int(x), int(y)), 5, (0, 0, 255), -1)
+            label = f"Voice ({x},{y})"
+            cv2.putText(img, label, (top_left[0], top_left[1] - 10), 
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
+                        
+    # 保存结果图片
+    output_path = os.path.join(screenshots_dir, "t6_debug_result.jpg")
+    cv2.imwrite(output_path, img)
+    logger.info(f"✅ 结果已保存至: {output_path}")
+    logger.info(f"共标记了 {voice_count} 条语音消息。请检查图片是否准确。")
+
+    # --- 验证转文字功能 (处理最后一条未转换语音) ---
+    logger.info("="*30)
+    logger.info("🔍 开始验证“转文字”功能 (仅针对最后一条未转换语音)...")
+    
+    # 筛选未转换的语音
+    unconverted_voices = []
+    for msg in messages:
+        if msg.get("type") == "voice" and msg.get("status") == "unconverted":
+             coords = msg.get("coordinates") or msg.get("center")
+             if coords:
+                 msg["coordinates"] = coords
+                 unconverted_voices.append(msg)
+    
+    if not unconverted_voices:
+        logger.info("⚠️ 没有发现未转换的语音消息，跳过验证。")
+    else:
+        last_voice = unconverted_voices[-1]
+        vx, vy = last_voice['coordinates']
+        content = last_voice.get('content', '0"')
+        logger.info(f"🎯 目标语音: {content}, 坐标: ({vx}, {vy})")
+        
+        # 1. 长按
+        logger.info(f"👆 长按语音消息...")
+        d.long_click(vx, vy, 1.5)
+        time.sleep(1.0)
+        
+        # 2. 截图菜单
+        menu_shot_path = os.path.join(screenshots_dir, "t6_menu_shot.jpg")
+        logger.info(f"📸 截取菜单: {menu_shot_path}")
+        d.screenshot(menu_shot_path)
+        
+        # 3. OCR 识别
+        logger.info("🧠 正在进行 OCR 识别菜单...")
+        ocr_kit = EasyOcrKit()
+        ocr_results = ocr_kit.read_text(menu_shot_path)
+        
+        convert_btn_center = None
+        for bbox, text, conf in ocr_results:
+            if "转文字" in text or "转换为文字" in text:
+                c_x = int((bbox[0][0] + bbox[2][0]) / 2)
+                c_y = int((bbox[0][1] + bbox[2][1]) / 2)
+                convert_btn_center = (c_x, c_y)
+                logger.info(f"✅ OCR 找到 '{text}' 按钮: {convert_btn_center}")
+                break
+        
+        if convert_btn_center:
+            # 4. 点击转文字
+            logger.info(f"👆 点击转文字按钮: {convert_btn_center}")
+            d.click(convert_btn_center[0], convert_btn_center[1])
+            
+            # 5. 动态等待
+            duration_str = content.replace('"', '').strip()
+            try:
+                duration = int(duration_str)
+            except:
+                duration = 10
+            
+            wait_seconds = max(2, duration / 5.0)
+            logger.info(f"⏳ 语音时长 {duration}s，模拟等待 {wait_seconds:.1f}s...")
+            time.sleep(wait_seconds)
+            logger.info("✅ 流程执行完毕！请检查手机屏幕是否已开始转换。")
+        else:
+            logger.error("❌ OCR 未找到 '转文字' 按钮！")
+            # 点击空白处关闭
+            d.click(vx + 200, vy)
+
+if __name__ == "__main__":
+    if sys.platform.startswith('win'):
+        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+    asyncio.run(main())
--- a/WeiXin/WxUtil.py
+++ b/WeiXin/WxUtil.py
--- a/WeiXin/pycache/WxUtil.cpython-310.pyc
+++ b/WeiXin/pycache/WxUtil.cpython-310.pyc