164 lines
5.7 KiB
Python
164 lines
5.7 KiB
Python
# coding=utf-8
|
||
import asyncio
|
||
import logging
|
||
import os
|
||
import sys
|
||
import time
|
||
|
||
import cv2
|
||
import uiautomator2 as u2
|
||
|
||
# 添加项目根目录到 sys.path
|
||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||
if project_root not in sys.path:
|
||
sys.path.append(project_root)
|
||
|
||
from WeiXin.WxUtil import get_vlm_analysis
|
||
from Util.EasyOcrKit import EasyOcrKit
|
||
|
||
# 配置日志
|
||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||
logger = logging.getLogger("T6_Debug")
|
||
|
||
async def main():
|
||
logger.info("🚀 T6 VLM 语音坐标调试工具启动...")
|
||
|
||
# 连接设备
|
||
try:
|
||
d = u2.connect()
|
||
logger.info(f"设备已连接: {d.info.get('serial')}")
|
||
except Exception as e:
|
||
logger.error(f"设备连接失败: {e}")
|
||
return
|
||
|
||
# 截图目录
|
||
screenshots_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
|
||
if not os.path.exists(screenshots_dir):
|
||
os.makedirs(screenshots_dir)
|
||
|
||
# 截图
|
||
screenshot_path = os.path.join(screenshots_dir, "t6_debug_temp.jpg")
|
||
logger.info("📸 正在截图...")
|
||
d.screenshot(screenshot_path)
|
||
|
||
# 调用 VLM 分析
|
||
logger.info("🧠 正在调用 VLM 分析图片...")
|
||
result_data = await get_vlm_analysis(screenshot_path)
|
||
|
||
if not result_data:
|
||
logger.error("❌ VLM 分析返回为空")
|
||
return
|
||
|
||
logger.info(f"VLM 返回结果: {result_data}")
|
||
|
||
# 读取图片用于绘制
|
||
img = cv2.imread(screenshot_path)
|
||
if img is None:
|
||
logger.error("❌ 无法读取截图文件")
|
||
return
|
||
|
||
messages = result_data.get("messages", [])
|
||
voice_count = 0
|
||
|
||
for msg in messages:
|
||
msg_type = msg.get("type")
|
||
content = msg.get("content")
|
||
coords = msg.get("coordinates") or msg.get("center")
|
||
|
||
if not coords:
|
||
continue
|
||
|
||
x, y = coords
|
||
|
||
if msg_type == "voice":
|
||
voice_count += 1
|
||
logger.info(f"🎤 发现语音消息: {content}, 坐标: ({x}, {y})")
|
||
|
||
# 绘制绿框 (语音)
|
||
w, h = 300, 80
|
||
top_left = (int(x - w/2), int(y - h/2))
|
||
bottom_right = (int(x + w/2), int(y + h/2))
|
||
|
||
cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 3)
|
||
cv2.circle(img, (int(x), int(y)), 5, (0, 0, 255), -1)
|
||
label = f"Voice ({x},{y})"
|
||
cv2.putText(img, label, (top_left[0], top_left[1] - 10),
|
||
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
||
|
||
# 保存结果图片
|
||
output_path = os.path.join(screenshots_dir, "t6_debug_result.jpg")
|
||
cv2.imwrite(output_path, img)
|
||
logger.info(f"✅ 结果已保存至: {output_path}")
|
||
logger.info(f"共标记了 {voice_count} 条语音消息。请检查图片是否准确。")
|
||
|
||
# --- 验证转文字功能 (处理最后一条未转换语音) ---
|
||
logger.info("="*30)
|
||
logger.info("🔍 开始验证“转文字”功能 (仅针对最后一条未转换语音)...")
|
||
|
||
# 筛选未转换的语音
|
||
unconverted_voices = []
|
||
for msg in messages:
|
||
if msg.get("type") == "voice" and msg.get("status") == "unconverted":
|
||
coords = msg.get("coordinates") or msg.get("center")
|
||
if coords:
|
||
msg["coordinates"] = coords
|
||
unconverted_voices.append(msg)
|
||
|
||
if not unconverted_voices:
|
||
logger.info("⚠️ 没有发现未转换的语音消息,跳过验证。")
|
||
else:
|
||
last_voice = unconverted_voices[-1]
|
||
vx, vy = last_voice['coordinates']
|
||
content = last_voice.get('content', '0"')
|
||
logger.info(f"🎯 目标语音: {content}, 坐标: ({vx}, {vy})")
|
||
|
||
# 1. 长按
|
||
logger.info(f"👆 长按语音消息...")
|
||
d.long_click(vx, vy, 1.5)
|
||
time.sleep(1.0)
|
||
|
||
# 2. 截图菜单
|
||
menu_shot_path = os.path.join(screenshots_dir, "t6_menu_shot.jpg")
|
||
logger.info(f"📸 截取菜单: {menu_shot_path}")
|
||
d.screenshot(menu_shot_path)
|
||
|
||
# 3. OCR 识别
|
||
logger.info("🧠 正在进行 OCR 识别菜单...")
|
||
ocr_kit = EasyOcrKit()
|
||
ocr_results = ocr_kit.read_text(menu_shot_path)
|
||
|
||
convert_btn_center = None
|
||
for bbox, text, conf in ocr_results:
|
||
if "转文字" in text or "转换为文字" in text:
|
||
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
|
||
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
|
||
convert_btn_center = (c_x, c_y)
|
||
logger.info(f"✅ OCR 找到 '{text}' 按钮: {convert_btn_center}")
|
||
break
|
||
|
||
if convert_btn_center:
|
||
# 4. 点击转文字
|
||
logger.info(f"👆 点击转文字按钮: {convert_btn_center}")
|
||
d.click(convert_btn_center[0], convert_btn_center[1])
|
||
|
||
# 5. 动态等待
|
||
duration_str = content.replace('"', '').strip()
|
||
try:
|
||
duration = int(duration_str)
|
||
except:
|
||
duration = 10
|
||
|
||
wait_seconds = max(2, duration / 5.0)
|
||
logger.info(f"⏳ 语音时长 {duration}s,模拟等待 {wait_seconds:.1f}s...")
|
||
time.sleep(wait_seconds)
|
||
logger.info("✅ 流程执行完毕!请检查手机屏幕是否已开始转换。")
|
||
else:
|
||
logger.error("❌ OCR 未找到 '转文字' 按钮!")
|
||
# 点击空白处关闭
|
||
d.click(vx + 200, vy)
|
||
|
||
if __name__ == "__main__":
|
||
if sys.platform.startswith('win'):
|
||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||
asyncio.run(main())
|