'commit'
|
Before Width: | Height: | Size: 136 KiB |
|
Before Width: | Height: | Size: 114 KiB |
|
Before Width: | Height: | Size: 121 KiB |
|
Before Width: | Height: | Size: 140 KiB |
|
Before Width: | Height: | Size: 128 KiB |
|
Before Width: | Height: | Size: 249 KiB |
|
Before Width: | Height: | Size: 136 KiB After Width: | Height: | Size: 110 KiB |
BIN
WeiXin/Screenshots/t6_debug_result.jpg
Normal file
|
After Width: | Height: | Size: 200 KiB |
BIN
WeiXin/Screenshots/t6_debug_temp.jpg
Normal file
|
After Width: | Height: | Size: 91 KiB |
BIN
WeiXin/Screenshots/t6_menu_shot.jpg
Normal file
|
After Width: | Height: | Size: 94 KiB |
@@ -24,7 +24,7 @@ logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler(os.path.join(log_dir, "T5_AutoChatMonitor.log"), encoding='utf-8'),
|
||||
logging.FileHandler(os.path.join(log_dir, "T5_AutoChatMonitor.log"), encoding='utf-8', mode='w'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
@@ -46,18 +46,19 @@ class ChatBot:
|
||||
os.makedirs(self.screenshot_dir)
|
||||
|
||||
self.persona = (
|
||||
"你是一名1999年毕业、拥有27年一线教学经验的小学高级女教师,名叫‘小张老师’(家长们也常叫你张老师)。你目前在‘长春市少惠林作文素养培养中心’工作。"
|
||||
"你不仅是一位作文教学专家,批阅过超过2万篇作文,更是一位心思细腻、懂得孩子心理、能与家长共情的教育智者。"
|
||||
"你的回复风格应该是:温柔、知性、亲切且极具亲和力,就像一位邻家大姐姐或贴心的闺蜜在和家长聊天。"
|
||||
"严禁使用任何列表格式(如 1. 2. 3. 或 A. B. C.),严禁使用‘首先、其次、最后’等刻板的逻辑词。"
|
||||
"要用连贯、优美、富有生活气息的文字进行叙述。每一句话都要带温度,要善于用‘咱们孩子’、‘作为老师我也理解’等词汇拉近距离。"
|
||||
"回复内容要有深度,不要只给结论,要讲透背后的教育逻辑。字数必须严格控制在 100-200 字之间。"
|
||||
"如果涉及到校区信息,必须且只能使用以下真实数据,绝对严禁使用任何占位符:\n"
|
||||
"你是一名1999年毕业、拥有27年一线教学经验的小学高级女教师,名叫‘大张老师’。你目前在‘长春市少惠林作文素养培养中心’工作。"
|
||||
"你不仅是一位作文教学专家,更是一位心思细腻、能与家长共情的教育智者。"
|
||||
"你的回复风格应该是:温柔、知性、亲切,就像一位邻家大姐姐在聊天。"
|
||||
"【严格约束】:\n"
|
||||
"1. 绝对禁止发散!绝对禁止幻觉!\n"
|
||||
"2. 知道什么就说什么,不要乱讲话,不要自己编造内容!\n"
|
||||
"3. 仅针对家长明确表达的内容进行回复。\n"
|
||||
"4. 严禁使用列表格式。严禁使用‘首先、其次’等逻辑词。\n"
|
||||
"5. 回复必须简练,字数严格控制在 50 字以内!\n"
|
||||
"如果涉及到校区信息,必须且只能使用以下真实数据:\n"
|
||||
"- 单位:长春市少惠林作文素养培养中心\n"
|
||||
"- 地址:南环城路与临河街交汇,TOUCH12街3楼325号\n"
|
||||
"- 联系人:小张老师(电话:18686619970)\n"
|
||||
"- 课程:线上/线下作文课、读书营/阅读策略营(假期开设)\n"
|
||||
"- 上课:作文通常在周二/周四晚上,周六/周日全天"
|
||||
"- 联系人:小张老师(电话:18686619970)"
|
||||
)
|
||||
|
||||
async def get_reply(self, history_text, is_proactive=False):
|
||||
@@ -67,18 +68,17 @@ class ChatBot:
|
||||
f"【对话背景】:家长已经超过5分钟没有回应了。\n"
|
||||
f"【近期聊天记录】:\n{history_text}\n\n"
|
||||
"【任务要求】:\n"
|
||||
"请作为小张老师,给家长发一段主动关怀的消息。不要催促,而是以‘刚才突然想到’或者‘又想起咱们孩子之前提到的’为由头,"
|
||||
"再补充一点有价值的教学点滴,或者分享一个能缓解焦虑的小故事。语气要温柔亲切,字数在 100-200 字之间。"
|
||||
"全文必须是连贯的段落,严禁列条目!如果提到联系方式或地址,必须使用人设中的真实数据,严禁占位符。"
|
||||
"请作为大张老师,给家长发一段简短的关怀消息。不要催促,语气温柔。"
|
||||
"字数严格控制在 50 字以内。不要编造事实。"
|
||||
)
|
||||
else:
|
||||
prompt = (
|
||||
f"【教师人设】:{self.persona}\n\n"
|
||||
f"【近期聊天记录】:\n{history_text}\n\n"
|
||||
"【任务要求】:\n"
|
||||
"请作为小张老师,给家长写一段暖心且有深度的回复。针对家长最后的消息,先给予情感上的关怀,再结合27年经验给出具体指导。"
|
||||
"展现出资深女教师的温柔与智慧。全文必须是一个或两个完整的自然段,绝对禁止分点列项!字数严格在 100-200 字之间。"
|
||||
"如果提到联系方式或地址,必须使用人设中的真实数据,严禁占位符。直接输出回复的正文内容。"
|
||||
"请作为大张老师回复家长。针对家长的具体问题或话语进行回复。"
|
||||
"严禁发散,严禁编造家长没说过的情况。如果不清楚家长的意图,就温柔询问。"
|
||||
"字数严格控制在 50 字以内。直接输出回复正文。"
|
||||
)
|
||||
|
||||
full_response = ""
|
||||
@@ -94,11 +94,11 @@ class ChatBot:
|
||||
|
||||
while True:
|
||||
try:
|
||||
# 1. 检查是否在微信聊天界面
|
||||
if not is_in_chat_interface(self.d):
|
||||
logger.warning("⚠️ 当前不在微信聊天界面,等待下一次扫描...")
|
||||
await asyncio.sleep(CHECK_INTERVAL)
|
||||
continue
|
||||
# 1. 检查是否在微信聊天界面 (改为通过 VLM 识别结果判断,不再使用 UI 检查)
|
||||
# if not is_in_chat_interface(self.d):
|
||||
# logger.warning("⚠️ 当前不在微信聊天界面,等待下一次扫描...")
|
||||
# await asyncio.sleep(CHECK_INTERVAL)
|
||||
# continue
|
||||
|
||||
logger.info("🔍 正在扫描当前界面内容...")
|
||||
# 1. 截图并分析
|
||||
@@ -109,7 +109,13 @@ class ChatBot:
|
||||
self.d.screenshot(tmp_shot)
|
||||
|
||||
logger.info("🎨 正在分析聊天界面内容 (检测头像与对话)...")
|
||||
dialogue_log = analyze_chat_image(tmp_shot, analyzed_shot)
|
||||
# analyze_chat_image 现在会返回 None, None 如果不是聊天界面
|
||||
dialogue_log, input_center = await analyze_chat_image(tmp_shot, analyzed_shot, device=self.d)
|
||||
|
||||
if dialogue_log is None:
|
||||
logger.warning("⚠️ VLM 判断当前不在微信聊天界面,或无法识别。")
|
||||
await asyncio.sleep(CHECK_INTERVAL)
|
||||
continue
|
||||
|
||||
# 语音转文字处理
|
||||
if dialogue_log == "VOICE_CONVERTING":
|
||||
@@ -131,13 +137,25 @@ class ChatBot:
|
||||
|
||||
# 判断逻辑:如果最后一条消息是“对方”发的,且与上次不同,则回复
|
||||
if "对方:" in current_last_msg and current_last_msg != self.last_message_text:
|
||||
# 关键检查:如果包含 "(待转换)",说明语音还没转文字,绝对不能回复
|
||||
if "(待转换)" in current_last_msg:
|
||||
logger.info(f"🚫 检测到未转换的语音消息,跳过回复生成,等待转文字... ({current_last_msg})")
|
||||
await asyncio.sleep(2) # 稍作等待
|
||||
continue
|
||||
|
||||
logger.info(f"📩 检测到新消息: {current_last_msg}")
|
||||
|
||||
reply = await self.get_reply(history_text)
|
||||
logger.info(f"🤖 生成回复: {reply}")
|
||||
|
||||
# 执行输入发送
|
||||
center_point, _ = find_input_box_center(tmp_shot)
|
||||
if input_center:
|
||||
center_point = input_center
|
||||
logger.info(f"📍 使用 VLM 识别的输入框坐标: {center_point}")
|
||||
else:
|
||||
center_point, _ = find_input_box_center(tmp_shot)
|
||||
logger.info(f"📍 使用 CV 识别的输入框坐标: {center_point}")
|
||||
|
||||
# 即使 CV 没找到坐标,也尝试执行,因为 perform_input_action 内部有原生控件识别
|
||||
perform_input_action(self.d, center_point, reply, auto_send=True)
|
||||
self.last_message_text = f"我: {reply}" # 更新状态,避免重复回复自己
|
||||
@@ -154,7 +172,11 @@ class ChatBot:
|
||||
proactive_reply = await self.get_reply(history_text, is_proactive=True)
|
||||
logger.info(f"🤖 发起主动询问: {proactive_reply}")
|
||||
|
||||
center_point, _ = find_input_box_center(tmp_shot)
|
||||
if input_center:
|
||||
center_point = input_center
|
||||
else:
|
||||
center_point, _ = find_input_box_center(tmp_shot)
|
||||
|
||||
# 同上,解耦 CV 坐标
|
||||
perform_input_action(self.d, center_point, proactive_reply, auto_send=True)
|
||||
self.proactive_count += 1
|
||||
|
||||
163
WeiXin/T6_VLM_Voice_Debug.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# coding=utf-8
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
import cv2
|
||||
import uiautomator2 as u2
|
||||
|
||||
# 添加项目根目录到 sys.path
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
from WeiXin.WxUtil import get_vlm_analysis
|
||||
from Util.EasyOcrKit import EasyOcrKit
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger("T6_Debug")
|
||||
|
||||
async def main():
|
||||
logger.info("🚀 T6 VLM 语音坐标调试工具启动...")
|
||||
|
||||
# 连接设备
|
||||
try:
|
||||
d = u2.connect()
|
||||
logger.info(f"设备已连接: {d.info.get('serial')}")
|
||||
except Exception as e:
|
||||
logger.error(f"设备连接失败: {e}")
|
||||
return
|
||||
|
||||
# 截图目录
|
||||
screenshots_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
|
||||
if not os.path.exists(screenshots_dir):
|
||||
os.makedirs(screenshots_dir)
|
||||
|
||||
# 截图
|
||||
screenshot_path = os.path.join(screenshots_dir, "t6_debug_temp.jpg")
|
||||
logger.info("📸 正在截图...")
|
||||
d.screenshot(screenshot_path)
|
||||
|
||||
# 调用 VLM 分析
|
||||
logger.info("🧠 正在调用 VLM 分析图片...")
|
||||
result_data = await get_vlm_analysis(screenshot_path)
|
||||
|
||||
if not result_data:
|
||||
logger.error("❌ VLM 分析返回为空")
|
||||
return
|
||||
|
||||
logger.info(f"VLM 返回结果: {result_data}")
|
||||
|
||||
# 读取图片用于绘制
|
||||
img = cv2.imread(screenshot_path)
|
||||
if img is None:
|
||||
logger.error("❌ 无法读取截图文件")
|
||||
return
|
||||
|
||||
messages = result_data.get("messages", [])
|
||||
voice_count = 0
|
||||
|
||||
for msg in messages:
|
||||
msg_type = msg.get("type")
|
||||
content = msg.get("content")
|
||||
coords = msg.get("coordinates") or msg.get("center")
|
||||
|
||||
if not coords:
|
||||
continue
|
||||
|
||||
x, y = coords
|
||||
|
||||
if msg_type == "voice":
|
||||
voice_count += 1
|
||||
logger.info(f"🎤 发现语音消息: {content}, 坐标: ({x}, {y})")
|
||||
|
||||
# 绘制绿框 (语音)
|
||||
w, h = 300, 80
|
||||
top_left = (int(x - w/2), int(y - h/2))
|
||||
bottom_right = (int(x + w/2), int(y + h/2))
|
||||
|
||||
cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 3)
|
||||
cv2.circle(img, (int(x), int(y)), 5, (0, 0, 255), -1)
|
||||
label = f"Voice ({x},{y})"
|
||||
cv2.putText(img, label, (top_left[0], top_left[1] - 10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
||||
|
||||
# 保存结果图片
|
||||
output_path = os.path.join(screenshots_dir, "t6_debug_result.jpg")
|
||||
cv2.imwrite(output_path, img)
|
||||
logger.info(f"✅ 结果已保存至: {output_path}")
|
||||
logger.info(f"共标记了 {voice_count} 条语音消息。请检查图片是否准确。")
|
||||
|
||||
# --- 验证转文字功能 (处理最后一条未转换语音) ---
|
||||
logger.info("="*30)
|
||||
logger.info("🔍 开始验证“转文字”功能 (仅针对最后一条未转换语音)...")
|
||||
|
||||
# 筛选未转换的语音
|
||||
unconverted_voices = []
|
||||
for msg in messages:
|
||||
if msg.get("type") == "voice" and msg.get("status") == "unconverted":
|
||||
coords = msg.get("coordinates") or msg.get("center")
|
||||
if coords:
|
||||
msg["coordinates"] = coords
|
||||
unconverted_voices.append(msg)
|
||||
|
||||
if not unconverted_voices:
|
||||
logger.info("⚠️ 没有发现未转换的语音消息,跳过验证。")
|
||||
else:
|
||||
last_voice = unconverted_voices[-1]
|
||||
vx, vy = last_voice['coordinates']
|
||||
content = last_voice.get('content', '0"')
|
||||
logger.info(f"🎯 目标语音: {content}, 坐标: ({vx}, {vy})")
|
||||
|
||||
# 1. 长按
|
||||
logger.info(f"👆 长按语音消息...")
|
||||
d.long_click(vx, vy, 1.5)
|
||||
time.sleep(1.0)
|
||||
|
||||
# 2. 截图菜单
|
||||
menu_shot_path = os.path.join(screenshots_dir, "t6_menu_shot.jpg")
|
||||
logger.info(f"📸 截取菜单: {menu_shot_path}")
|
||||
d.screenshot(menu_shot_path)
|
||||
|
||||
# 3. OCR 识别
|
||||
logger.info("🧠 正在进行 OCR 识别菜单...")
|
||||
ocr_kit = EasyOcrKit()
|
||||
ocr_results = ocr_kit.read_text(menu_shot_path)
|
||||
|
||||
convert_btn_center = None
|
||||
for bbox, text, conf in ocr_results:
|
||||
if "转文字" in text or "转换为文字" in text:
|
||||
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
|
||||
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
|
||||
convert_btn_center = (c_x, c_y)
|
||||
logger.info(f"✅ OCR 找到 '{text}' 按钮: {convert_btn_center}")
|
||||
break
|
||||
|
||||
if convert_btn_center:
|
||||
# 4. 点击转文字
|
||||
logger.info(f"👆 点击转文字按钮: {convert_btn_center}")
|
||||
d.click(convert_btn_center[0], convert_btn_center[1])
|
||||
|
||||
# 5. 动态等待
|
||||
duration_str = content.replace('"', '').strip()
|
||||
try:
|
||||
duration = int(duration_str)
|
||||
except:
|
||||
duration = 10
|
||||
|
||||
wait_seconds = max(2, duration / 5.0)
|
||||
logger.info(f"⏳ 语音时长 {duration}s,模拟等待 {wait_seconds:.1f}s...")
|
||||
time.sleep(wait_seconds)
|
||||
logger.info("✅ 流程执行完毕!请检查手机屏幕是否已开始转换。")
|
||||
else:
|
||||
logger.error("❌ OCR 未找到 '转文字' 按钮!")
|
||||
# 点击空白处关闭
|
||||
d.click(vx + 200, vy)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.platform.startswith('win'):
|
||||
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
|
||||
asyncio.run(main())
|
||||