164 lines
5.7 KiB
Python
164 lines
5.7 KiB
Python
|
|
# coding=utf-8
|
|||
|
|
import asyncio
|
|||
|
|
import logging
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
import time
|
|||
|
|
|
|||
|
|
import cv2
|
|||
|
|
import uiautomator2 as u2
|
|||
|
|
|
|||
|
|
# 添加项目根目录到 sys.path
|
|||
|
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|||
|
|
if project_root not in sys.path:
|
|||
|
|
sys.path.append(project_root)
|
|||
|
|
|
|||
|
|
from WeiXin.WxUtil import get_vlm_analysis
|
|||
|
|
from Util.EasyOcrKit import EasyOcrKit
|
|||
|
|
|
|||
|
|
# 配置日志
|
|||
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|||
|
|
logger = logging.getLogger("T6_Debug")
|
|||
|
|
|
|||
|
|
async def main():
|
|||
|
|
logger.info("🚀 T6 VLM 语音坐标调试工具启动...")
|
|||
|
|
|
|||
|
|
# 连接设备
|
|||
|
|
try:
|
|||
|
|
d = u2.connect()
|
|||
|
|
logger.info(f"设备已连接: {d.info.get('serial')}")
|
|||
|
|
except Exception as e:
|
|||
|
|
logger.error(f"设备连接失败: {e}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 截图目录
|
|||
|
|
screenshots_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
|
|||
|
|
if not os.path.exists(screenshots_dir):
|
|||
|
|
os.makedirs(screenshots_dir)
|
|||
|
|
|
|||
|
|
# 截图
|
|||
|
|
screenshot_path = os.path.join(screenshots_dir, "t6_debug_temp.jpg")
|
|||
|
|
logger.info("📸 正在截图...")
|
|||
|
|
d.screenshot(screenshot_path)
|
|||
|
|
|
|||
|
|
# 调用 VLM 分析
|
|||
|
|
logger.info("🧠 正在调用 VLM 分析图片...")
|
|||
|
|
result_data = await get_vlm_analysis(screenshot_path)
|
|||
|
|
|
|||
|
|
if not result_data:
|
|||
|
|
logger.error("❌ VLM 分析返回为空")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
logger.info(f"VLM 返回结果: {result_data}")
|
|||
|
|
|
|||
|
|
# 读取图片用于绘制
|
|||
|
|
img = cv2.imread(screenshot_path)
|
|||
|
|
if img is None:
|
|||
|
|
logger.error("❌ 无法读取截图文件")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
messages = result_data.get("messages", [])
|
|||
|
|
voice_count = 0
|
|||
|
|
|
|||
|
|
for msg in messages:
|
|||
|
|
msg_type = msg.get("type")
|
|||
|
|
content = msg.get("content")
|
|||
|
|
coords = msg.get("coordinates") or msg.get("center")
|
|||
|
|
|
|||
|
|
if not coords:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
x, y = coords
|
|||
|
|
|
|||
|
|
if msg_type == "voice":
|
|||
|
|
voice_count += 1
|
|||
|
|
logger.info(f"🎤 发现语音消息: {content}, 坐标: ({x}, {y})")
|
|||
|
|
|
|||
|
|
# 绘制绿框 (语音)
|
|||
|
|
w, h = 300, 80
|
|||
|
|
top_left = (int(x - w/2), int(y - h/2))
|
|||
|
|
bottom_right = (int(x + w/2), int(y + h/2))
|
|||
|
|
|
|||
|
|
cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 3)
|
|||
|
|
cv2.circle(img, (int(x), int(y)), 5, (0, 0, 255), -1)
|
|||
|
|
label = f"Voice ({x},{y})"
|
|||
|
|
cv2.putText(img, label, (top_left[0], top_left[1] - 10),
|
|||
|
|
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
|||
|
|
|
|||
|
|
# 保存结果图片
|
|||
|
|
output_path = os.path.join(screenshots_dir, "t6_debug_result.jpg")
|
|||
|
|
cv2.imwrite(output_path, img)
|
|||
|
|
logger.info(f"✅ 结果已保存至: {output_path}")
|
|||
|
|
logger.info(f"共标记了 {voice_count} 条语音消息。请检查图片是否准确。")
|
|||
|
|
|
|||
|
|
# --- 验证转文字功能 (处理最后一条未转换语音) ---
|
|||
|
|
logger.info("="*30)
|
|||
|
|
logger.info("🔍 开始验证“转文字”功能 (仅针对最后一条未转换语音)...")
|
|||
|
|
|
|||
|
|
# 筛选未转换的语音
|
|||
|
|
unconverted_voices = []
|
|||
|
|
for msg in messages:
|
|||
|
|
if msg.get("type") == "voice" and msg.get("status") == "unconverted":
|
|||
|
|
coords = msg.get("coordinates") or msg.get("center")
|
|||
|
|
if coords:
|
|||
|
|
msg["coordinates"] = coords
|
|||
|
|
unconverted_voices.append(msg)
|
|||
|
|
|
|||
|
|
if not unconverted_voices:
|
|||
|
|
logger.info("⚠️ 没有发现未转换的语音消息,跳过验证。")
|
|||
|
|
else:
|
|||
|
|
last_voice = unconverted_voices[-1]
|
|||
|
|
vx, vy = last_voice['coordinates']
|
|||
|
|
content = last_voice.get('content', '0"')
|
|||
|
|
logger.info(f"🎯 目标语音: {content}, 坐标: ({vx}, {vy})")
|
|||
|
|
|
|||
|
|
# 1. 长按
|
|||
|
|
logger.info(f"👆 长按语音消息...")
|
|||
|
|
d.long_click(vx, vy, 1.5)
|
|||
|
|
time.sleep(1.0)
|
|||
|
|
|
|||
|
|
# 2. 截图菜单
|
|||
|
|
menu_shot_path = os.path.join(screenshots_dir, "t6_menu_shot.jpg")
|
|||
|
|
logger.info(f"📸 截取菜单: {menu_shot_path}")
|
|||
|
|
d.screenshot(menu_shot_path)
|
|||
|
|
|
|||
|
|
# 3. OCR 识别
|
|||
|
|
logger.info("🧠 正在进行 OCR 识别菜单...")
|
|||
|
|
ocr_kit = EasyOcrKit()
|
|||
|
|
ocr_results = ocr_kit.read_text(menu_shot_path)
|
|||
|
|
|
|||
|
|
convert_btn_center = None
|
|||
|
|
for bbox, text, conf in ocr_results:
|
|||
|
|
if "转文字" in text or "转换为文字" in text:
|
|||
|
|
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
|
|||
|
|
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
|
|||
|
|
convert_btn_center = (c_x, c_y)
|
|||
|
|
logger.info(f"✅ OCR 找到 '{text}' 按钮: {convert_btn_center}")
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if convert_btn_center:
|
|||
|
|
# 4. 点击转文字
|
|||
|
|
logger.info(f"👆 点击转文字按钮: {convert_btn_center}")
|
|||
|
|
d.click(convert_btn_center[0], convert_btn_center[1])
|
|||
|
|
|
|||
|
|
# 5. 动态等待
|
|||
|
|
duration_str = content.replace('"', '').strip()
|
|||
|
|
try:
|
|||
|
|
duration = int(duration_str)
|
|||
|
|
except:
|
|||
|
|
duration = 10
|
|||
|
|
|
|||
|
|
wait_seconds = max(2, duration / 5.0)
|
|||
|
|
logger.info(f"⏳ 语音时长 {duration}s,模拟等待 {wait_seconds:.1f}s...")
|
|||
|
|
time.sleep(wait_seconds)
|
|||
|
|
logger.info("✅ 流程执行完毕!请检查手机屏幕是否已开始转换。")
|
|||
|
|
else:
|
|||
|
|
logger.error("❌ OCR 未找到 '转文字' 按钮!")
|
|||
|
|
# 点击空白处关闭
|
|||
|
|
d.click(vx + 200, vy)
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
if sys.platform.startswith('win'):
|
|||
|
|
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
|||
|
|
asyncio.run(main())
|