aiData/WeiXin/T3_Voice_ActionTest.py

# coding=utf-8
"""
T3_Voice.py - 语音交互流程测试 (Voice Interaction Test)

【核心功能】
- 动态交互：执行“截图 -> 识别语音 -> 点击转文字 -> 再次截图 -> 提取内容”的完整闭环。
- 动作验证：测试长按操作、菜单识别、转文字等待逻辑。
- 异步逻辑：验证异步 OCR 任务是否能正确捕获转文字后的内容。

【使用场景】
- 验证微信版本更新后“转文字”按钮位置是否变化。
- 调试语音消息“点不开”或“转完没内容”的问题。
- 测试语音转文字的防死循环机制。
"""
import os
import sys
import logging
import asyncio
import cv2

# Add project root
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
    sys.path.append(project_root)

from WeiXin import WxUtil

# Setup Logger
logger = logging.getLogger("T3_Voice")

# Configure Root Logger to ensure all logs (including WxUtil) go to file
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)

# Ensure Log Directory exists
if hasattr(WxUtil, 'LOG_DIR'):
    log_dir = WxUtil.LOG_DIR
else:
    log_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Logs")

if not os.path.exists(log_dir):
    os.makedirs(log_dir)

# File Handler
log_file = os.path.join(log_dir, "T3_Voice.log")
fh = logging.FileHandler(log_file, mode='w', encoding='utf-8')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
root_logger.addHandler(fh)

# Console Handler (if not already added by WxUtil)
if not any(isinstance(h, logging.StreamHandler) for h in root_logger.handlers):
    ch = logging.StreamHandler()
    ch.setFormatter(formatter)
    root_logger.addHandler(ch)

async def main():
    logger.info("--- T3: Voice Processing Test ---")

    # 1. Connect
    d = WxUtil.connect_device()
    if not d:
        logger.error("Device connection failed.")
        return

    # 2. Screenshot
    screenshot_path = os.path.join(WxUtil.OUTPUT_DIR, "T3_Screenshot.jpg")
    debug_path = os.path.join(WxUtil.OUTPUT_DIR, "T3_Debug.jpg")

    logger.info("Taking screenshot...")
    d.screenshot(screenshot_path)
    logger.info("Initial screenshot taken.")

    # 3. Use analyze_chat_image with strategy='ALL' to test conversion
    # This simulates the real bot behavior for voice messages
    logger.info("Running WxUtil.analyze_chat_image (Strategy=ALL)...")

    # This function handles the click -> convert -> rescan loop internally
    final_messages, _ = await WxUtil.analyze_chat_image(
        screenshot_path,
        debug_path,
        device=d,
        process_strategy="ALL"
    )

    # 4. Output Result
    logger.info("--- Final Voice Results ---")
    voice_msgs = [m for m in final_messages if m['type'] == 'voice']
    if not voice_msgs:
        logger.info("No voice messages found.")
    else:
        for i, msg in enumerate(voice_msgs):
            content = msg.get('content')
            converted_status = "YES" if msg.get('is_converted') else "NO"
            logger.info(f"Voice [{i}] Sender: {msg['sender']} | Converted: {converted_status} | Content: {content}")

if __name__ == "__main__":
    asyncio.run(main())