# coding=utf-8 """ T3_Voice.py - 语音交互流程测试 (Voice Interaction Test) 【核心功能】 - 动态交互:执行“截图 -> 识别语音 -> 点击转文字 -> 再次截图 -> 提取内容”的完整闭环。 - 动作验证:测试长按操作、菜单识别、转文字等待逻辑。 - 异步逻辑:验证异步 OCR 任务是否能正确捕获转文字后的内容。 【使用场景】 - 验证微信版本更新后“转文字”按钮位置是否变化。 - 调试语音消息“点不开”或“转完没内容”的问题。 - 测试语音转文字的防死循环机制。 """ import os import sys import logging import asyncio import cv2 # Add project root project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) if project_root not in sys.path: sys.path.append(project_root) from WeiXin import WxUtil # Setup Logger logger = logging.getLogger("T3_Voice") # Configure Root Logger to ensure all logs (including WxUtil) go to file root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) # Ensure Log Directory exists if hasattr(WxUtil, 'LOG_DIR'): log_dir = WxUtil.LOG_DIR else: log_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Logs") if not os.path.exists(log_dir): os.makedirs(log_dir) # File Handler log_file = os.path.join(log_dir, "T3_Voice.log") fh = logging.FileHandler(log_file, mode='w', encoding='utf-8') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) root_logger.addHandler(fh) # Console Handler (if not already added by WxUtil) if not any(isinstance(h, logging.StreamHandler) for h in root_logger.handlers): ch = logging.StreamHandler() ch.setFormatter(formatter) root_logger.addHandler(ch) async def main(): logger.info("--- T3: Voice Processing Test ---") # 1. Connect d = WxUtil.connect_device() if not d: logger.error("Device connection failed.") return # 2. Screenshot screenshot_path = os.path.join(WxUtil.OUTPUT_DIR, "T3_Screenshot.jpg") debug_path = os.path.join(WxUtil.OUTPUT_DIR, "T3_Debug.jpg") logger.info("Taking screenshot...") d.screenshot(screenshot_path) logger.info("Initial screenshot taken.") # 3. Use analyze_chat_image with strategy='ALL' to test conversion # This simulates the real bot behavior for voice messages logger.info("Running WxUtil.analyze_chat_image (Strategy=ALL)...") # This function handles the click -> convert -> rescan loop internally final_messages, _ = await WxUtil.analyze_chat_image( screenshot_path, debug_path, device=d, process_strategy="ALL" ) # 4. Output Result logger.info("--- Final Voice Results ---") voice_msgs = [m for m in final_messages if m['type'] == 'voice'] if not voice_msgs: logger.info("No voice messages found.") else: for i, msg in enumerate(voice_msgs): content = msg.get('content') converted_status = "YES" if msg.get('is_converted') else "NO" logger.info(f"Voice [{i}] Sender: {msg['sender']} | Converted: {converted_status} | Content: {content}") if __name__ == "__main__": asyncio.run(main())