Files
aiData/WeiXin/T6_VLM_Voice_Debug.py

164 lines
5.7 KiB
Python
Raw Normal View History

2026-01-25 12:52:52 +08:00
# coding=utf-8
import asyncio
import logging
import os
import sys
import time
import cv2
import uiautomator2 as u2
# 添加项目根目录到 sys.path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
from WeiXin.WxUtil import get_vlm_analysis
from Util.EasyOcrKit import EasyOcrKit
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("T6_Debug")
async def main():
logger.info("🚀 T6 VLM 语音坐标调试工具启动...")
# 连接设备
try:
d = u2.connect()
logger.info(f"设备已连接: {d.info.get('serial')}")
except Exception as e:
logger.error(f"设备连接失败: {e}")
return
# 截图目录
screenshots_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
if not os.path.exists(screenshots_dir):
os.makedirs(screenshots_dir)
# 截图
screenshot_path = os.path.join(screenshots_dir, "t6_debug_temp.jpg")
logger.info("📸 正在截图...")
d.screenshot(screenshot_path)
# 调用 VLM 分析
logger.info("🧠 正在调用 VLM 分析图片...")
result_data = await get_vlm_analysis(screenshot_path)
if not result_data:
logger.error("❌ VLM 分析返回为空")
return
logger.info(f"VLM 返回结果: {result_data}")
# 读取图片用于绘制
img = cv2.imread(screenshot_path)
if img is None:
logger.error("❌ 无法读取截图文件")
return
messages = result_data.get("messages", [])
voice_count = 0
for msg in messages:
msg_type = msg.get("type")
content = msg.get("content")
coords = msg.get("coordinates") or msg.get("center")
if not coords:
continue
x, y = coords
if msg_type == "voice":
voice_count += 1
logger.info(f"🎤 发现语音消息: {content}, 坐标: ({x}, {y})")
# 绘制绿框 (语音)
w, h = 300, 80
top_left = (int(x - w/2), int(y - h/2))
bottom_right = (int(x + w/2), int(y + h/2))
cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 3)
cv2.circle(img, (int(x), int(y)), 5, (0, 0, 255), -1)
label = f"Voice ({x},{y})"
cv2.putText(img, label, (top_left[0], top_left[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
# 保存结果图片
output_path = os.path.join(screenshots_dir, "t6_debug_result.jpg")
cv2.imwrite(output_path, img)
logger.info(f"✅ 结果已保存至: {output_path}")
logger.info(f"共标记了 {voice_count} 条语音消息。请检查图片是否准确。")
# --- 验证转文字功能 (处理最后一条未转换语音) ---
logger.info("="*30)
logger.info("🔍 开始验证“转文字”功能 (仅针对最后一条未转换语音)...")
# 筛选未转换的语音
unconverted_voices = []
for msg in messages:
if msg.get("type") == "voice" and msg.get("status") == "unconverted":
coords = msg.get("coordinates") or msg.get("center")
if coords:
msg["coordinates"] = coords
unconverted_voices.append(msg)
if not unconverted_voices:
logger.info("⚠️ 没有发现未转换的语音消息,跳过验证。")
else:
last_voice = unconverted_voices[-1]
vx, vy = last_voice['coordinates']
content = last_voice.get('content', '0"')
logger.info(f"🎯 目标语音: {content}, 坐标: ({vx}, {vy})")
# 1. 长按
logger.info(f"👆 长按语音消息...")
d.long_click(vx, vy, 1.5)
time.sleep(1.0)
# 2. 截图菜单
menu_shot_path = os.path.join(screenshots_dir, "t6_menu_shot.jpg")
logger.info(f"📸 截取菜单: {menu_shot_path}")
d.screenshot(menu_shot_path)
# 3. OCR 识别
logger.info("🧠 正在进行 OCR 识别菜单...")
ocr_kit = EasyOcrKit()
ocr_results = ocr_kit.read_text(menu_shot_path)
convert_btn_center = None
for bbox, text, conf in ocr_results:
if "转文字" in text or "转换为文字" in text:
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
convert_btn_center = (c_x, c_y)
logger.info(f"✅ OCR 找到 '{text}' 按钮: {convert_btn_center}")
break
if convert_btn_center:
# 4. 点击转文字
logger.info(f"👆 点击转文字按钮: {convert_btn_center}")
d.click(convert_btn_center[0], convert_btn_center[1])
# 5. 动态等待
duration_str = content.replace('"', '').strip()
try:
duration = int(duration_str)
except:
duration = 10
wait_seconds = max(2, duration / 5.0)
logger.info(f"⏳ 语音时长 {duration}s模拟等待 {wait_seconds:.1f}s...")
time.sleep(wait_seconds)
logger.info("✅ 流程执行完毕!请检查手机屏幕是否已开始转换。")
else:
logger.error("❌ OCR 未找到 '转文字' 按钮!")
# 点击空白处关闭
d.click(vx + 200, vy)
if __name__ == "__main__":
if sys.platform.startswith('win'):
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())