This commit is contained in:
HuangHai
2026-01-25 12:52:52 +08:00
parent 59dc95cd9d
commit bf485d10f1
30 changed files with 523 additions and 688 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 165 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 167 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 79 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 114 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 128 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 249 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 136 KiB

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 200 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

View File

@@ -24,7 +24,7 @@ logging.basicConfig(
level=logging.INFO, level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[ handlers=[
logging.FileHandler(os.path.join(log_dir, "T5_AutoChatMonitor.log"), encoding='utf-8'), logging.FileHandler(os.path.join(log_dir, "T5_AutoChatMonitor.log"), encoding='utf-8', mode='w'),
logging.StreamHandler() logging.StreamHandler()
] ]
) )
@@ -46,18 +46,19 @@ class ChatBot:
os.makedirs(self.screenshot_dir) os.makedirs(self.screenshot_dir)
self.persona = ( self.persona = (
"你是一名1999年毕业、拥有27年一线教学经验的小学高级女教师名叫张老师’(家长们也常叫你张老师)。你目前在‘长春市少惠林作文素养培养中心’工作。" "你是一名1999年毕业、拥有27年一线教学经验的小学高级女教师名叫张老师’。你目前在‘长春市少惠林作文素养培养中心’工作。"
"你不仅是一位作文教学专家,批阅过超过2万篇作文更是一位心思细腻、懂得孩子心理、能与家长共情的教育智者。" "你不仅是一位作文教学专家,更是一位心思细腻、能与家长共情的教育智者。"
"你的回复风格应该是:温柔、知性、亲切且极具亲和力,就像一位邻家大姐姐或贴心的闺蜜在和家长聊天。" "你的回复风格应该是:温柔、知性、亲切,就像一位邻家大姐姐聊天。"
"严禁使用任何列表格式(如 1. 2. 3. 或 A. B. C.),严禁使用‘首先、其次、最后’等刻板的逻辑词。" "【严格约束】:\n"
"要用连贯、优美、富有生活气息的文字进行叙述。每一句话都要带温度,要善于用‘咱们孩子’、‘作为老师我也理解’等词汇拉近距离。" "1. 绝对禁止发散!绝对禁止幻觉!\n"
"回复内容要有深度,不要只给结论,要讲透背后的教育逻辑。字数必须严格控制在 100-200 字之间。" "2. 知道什么就说什么,不要乱讲话,不要自己编造内容!\n"
"如果涉及到校区信息,必须且只能使用以下真实数据,绝对严禁使用任何占位符:\n" "3. 仅针对家长明确表达的内容进行回复。\n"
"4. 严禁使用列表格式。严禁使用‘首先、其次’等逻辑词。\n"
"5. 回复必须简练,字数严格控制在 50 字以内!\n"
"如果涉及到校区信息,必须且只能使用以下真实数据:\n"
"- 单位:长春市少惠林作文素养培养中心\n" "- 单位:长春市少惠林作文素养培养中心\n"
"- 地址南环城路与临河街交汇TOUCH12街3楼325号\n" "- 地址南环城路与临河街交汇TOUCH12街3楼325号\n"
"- 联系人小张老师电话18686619970\n" "- 联系人小张老师电话18686619970"
"- 课程:线上/线下作文课、读书营/阅读策略营(假期开设)\n"
"- 上课:作文通常在周二/周四晚上,周六/周日全天"
) )
async def get_reply(self, history_text, is_proactive=False): async def get_reply(self, history_text, is_proactive=False):
@@ -67,18 +68,17 @@ class ChatBot:
f"【对话背景】家长已经超过5分钟没有回应了。\n" f"【对话背景】家长已经超过5分钟没有回应了。\n"
f"【近期聊天记录】:\n{history_text}\n\n" f"【近期聊天记录】:\n{history_text}\n\n"
"【任务要求】:\n" "【任务要求】:\n"
"请作为张老师,给家长发一段主动关怀消息。不要催促,而是以‘刚才突然想到’或者‘又想起咱们孩子之前提到的’为由头," "请作为张老师,给家长发一段简短的关怀消息。不要催促,语气温柔。"
"再补充一点有价值的教学点滴,或者分享一个能缓解焦虑的小故事。语气要温柔亲切,字数在 100-200 字之间" "字数严格控制在 50 字以内。不要编造事实"
"全文必须是连贯的段落,严禁列条目!如果提到联系方式或地址,必须使用人设中的真实数据,严禁占位符。"
) )
else: else:
prompt = ( prompt = (
f"【教师人设】:{self.persona}\n\n" f"【教师人设】:{self.persona}\n\n"
f"【近期聊天记录】:\n{history_text}\n\n" f"【近期聊天记录】:\n{history_text}\n\n"
"【任务要求】:\n" "【任务要求】:\n"
"请作为张老师给家长写一段暖心且有深度的回复。针对家长最后的消息先给予情感上的关怀再结合27年经验给出具体指导" "请作为张老师回复家长。针对家长的具体问题或话语进行回复"
"展现出资深女教师的温柔与智慧。全文必须是一个或两个完整的自然段,绝对禁止分点列项!字数严格在 100-200 字之间" "严禁发散,严禁编造家长没说过的情况。如果不清楚家长的意图,就温柔询问"
"如果提到联系方式或地址,必须使用人设中的真实数据,严禁占位符。直接输出回复正文内容" "字数严格控制在 50 字以内。直接输出回复正文。"
) )
full_response = "" full_response = ""
@@ -94,11 +94,11 @@ class ChatBot:
while True: while True:
try: try:
# 1. 检查是否在微信聊天界面 # 1. 检查是否在微信聊天界面 (改为通过 VLM 识别结果判断,不再使用 UI 检查)
if not is_in_chat_interface(self.d): # if not is_in_chat_interface(self.d):
logger.warning("⚠️ 当前不在微信聊天界面,等待下一次扫描...") # logger.warning("⚠️ 当前不在微信聊天界面,等待下一次扫描...")
await asyncio.sleep(CHECK_INTERVAL) # await asyncio.sleep(CHECK_INTERVAL)
continue # continue
logger.info("🔍 正在扫描当前界面内容...") logger.info("🔍 正在扫描当前界面内容...")
# 1. 截图并分析 # 1. 截图并分析
@@ -109,7 +109,13 @@ class ChatBot:
self.d.screenshot(tmp_shot) self.d.screenshot(tmp_shot)
logger.info("🎨 正在分析聊天界面内容 (检测头像与对话)...") logger.info("🎨 正在分析聊天界面内容 (检测头像与对话)...")
dialogue_log = analyze_chat_image(tmp_shot, analyzed_shot) # analyze_chat_image 现在会返回 None, None 如果不是聊天界面
dialogue_log, input_center = await analyze_chat_image(tmp_shot, analyzed_shot, device=self.d)
if dialogue_log is None:
logger.warning("⚠️ VLM 判断当前不在微信聊天界面,或无法识别。")
await asyncio.sleep(CHECK_INTERVAL)
continue
# 语音转文字处理 # 语音转文字处理
if dialogue_log == "VOICE_CONVERTING": if dialogue_log == "VOICE_CONVERTING":
@@ -131,13 +137,25 @@ class ChatBot:
# 判断逻辑:如果最后一条消息是“对方”发的,且与上次不同,则回复 # 判断逻辑:如果最后一条消息是“对方”发的,且与上次不同,则回复
if "对方:" in current_last_msg and current_last_msg != self.last_message_text: if "对方:" in current_last_msg and current_last_msg != self.last_message_text:
# 关键检查:如果包含 "(待转换)",说明语音还没转文字,绝对不能回复
if "(待转换)" in current_last_msg:
logger.info(f"🚫 检测到未转换的语音消息,跳过回复生成,等待转文字... ({current_last_msg})")
await asyncio.sleep(2) # 稍作等待
continue
logger.info(f"📩 检测到新消息: {current_last_msg}") logger.info(f"📩 检测到新消息: {current_last_msg}")
reply = await self.get_reply(history_text) reply = await self.get_reply(history_text)
logger.info(f"🤖 生成回复: {reply}") logger.info(f"🤖 生成回复: {reply}")
# 执行输入发送 # 执行输入发送
center_point, _ = find_input_box_center(tmp_shot) if input_center:
center_point = input_center
logger.info(f"📍 使用 VLM 识别的输入框坐标: {center_point}")
else:
center_point, _ = find_input_box_center(tmp_shot)
logger.info(f"📍 使用 CV 识别的输入框坐标: {center_point}")
# 即使 CV 没找到坐标,也尝试执行,因为 perform_input_action 内部有原生控件识别 # 即使 CV 没找到坐标,也尝试执行,因为 perform_input_action 内部有原生控件识别
perform_input_action(self.d, center_point, reply, auto_send=True) perform_input_action(self.d, center_point, reply, auto_send=True)
self.last_message_text = f"我: {reply}" # 更新状态,避免重复回复自己 self.last_message_text = f"我: {reply}" # 更新状态,避免重复回复自己
@@ -154,7 +172,11 @@ class ChatBot:
proactive_reply = await self.get_reply(history_text, is_proactive=True) proactive_reply = await self.get_reply(history_text, is_proactive=True)
logger.info(f"🤖 发起主动询问: {proactive_reply}") logger.info(f"🤖 发起主动询问: {proactive_reply}")
center_point, _ = find_input_box_center(tmp_shot) if input_center:
center_point = input_center
else:
center_point, _ = find_input_box_center(tmp_shot)
# 同上,解耦 CV 坐标 # 同上,解耦 CV 坐标
perform_input_action(self.d, center_point, proactive_reply, auto_send=True) perform_input_action(self.d, center_point, proactive_reply, auto_send=True)
self.proactive_count += 1 self.proactive_count += 1

View File

@@ -0,0 +1,163 @@
# coding=utf-8
import asyncio
import logging
import os
import sys
import time
import cv2
import uiautomator2 as u2
# 添加项目根目录到 sys.path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
from WeiXin.WxUtil import get_vlm_analysis
from Util.EasyOcrKit import EasyOcrKit
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("T6_Debug")
async def main():
logger.info("🚀 T6 VLM 语音坐标调试工具启动...")
# 连接设备
try:
d = u2.connect()
logger.info(f"设备已连接: {d.info.get('serial')}")
except Exception as e:
logger.error(f"设备连接失败: {e}")
return
# 截图目录
screenshots_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
if not os.path.exists(screenshots_dir):
os.makedirs(screenshots_dir)
# 截图
screenshot_path = os.path.join(screenshots_dir, "t6_debug_temp.jpg")
logger.info("📸 正在截图...")
d.screenshot(screenshot_path)
# 调用 VLM 分析
logger.info("🧠 正在调用 VLM 分析图片...")
result_data = await get_vlm_analysis(screenshot_path)
if not result_data:
logger.error("❌ VLM 分析返回为空")
return
logger.info(f"VLM 返回结果: {result_data}")
# 读取图片用于绘制
img = cv2.imread(screenshot_path)
if img is None:
logger.error("❌ 无法读取截图文件")
return
messages = result_data.get("messages", [])
voice_count = 0
for msg in messages:
msg_type = msg.get("type")
content = msg.get("content")
coords = msg.get("coordinates") or msg.get("center")
if not coords:
continue
x, y = coords
if msg_type == "voice":
voice_count += 1
logger.info(f"🎤 发现语音消息: {content}, 坐标: ({x}, {y})")
# 绘制绿框 (语音)
w, h = 300, 80
top_left = (int(x - w/2), int(y - h/2))
bottom_right = (int(x + w/2), int(y + h/2))
cv2.rectangle(img, top_left, bottom_right, (0, 255, 0), 3)
cv2.circle(img, (int(x), int(y)), 5, (0, 0, 255), -1)
label = f"Voice ({x},{y})"
cv2.putText(img, label, (top_left[0], top_left[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
# 保存结果图片
output_path = os.path.join(screenshots_dir, "t6_debug_result.jpg")
cv2.imwrite(output_path, img)
logger.info(f"✅ 结果已保存至: {output_path}")
logger.info(f"共标记了 {voice_count} 条语音消息。请检查图片是否准确。")
# --- 验证转文字功能 (处理最后一条未转换语音) ---
logger.info("="*30)
logger.info("🔍 开始验证“转文字”功能 (仅针对最后一条未转换语音)...")
# 筛选未转换的语音
unconverted_voices = []
for msg in messages:
if msg.get("type") == "voice" and msg.get("status") == "unconverted":
coords = msg.get("coordinates") or msg.get("center")
if coords:
msg["coordinates"] = coords
unconverted_voices.append(msg)
if not unconverted_voices:
logger.info("⚠️ 没有发现未转换的语音消息,跳过验证。")
else:
last_voice = unconverted_voices[-1]
vx, vy = last_voice['coordinates']
content = last_voice.get('content', '0"')
logger.info(f"🎯 目标语音: {content}, 坐标: ({vx}, {vy})")
# 1. 长按
logger.info(f"👆 长按语音消息...")
d.long_click(vx, vy, 1.5)
time.sleep(1.0)
# 2. 截图菜单
menu_shot_path = os.path.join(screenshots_dir, "t6_menu_shot.jpg")
logger.info(f"📸 截取菜单: {menu_shot_path}")
d.screenshot(menu_shot_path)
# 3. OCR 识别
logger.info("🧠 正在进行 OCR 识别菜单...")
ocr_kit = EasyOcrKit()
ocr_results = ocr_kit.read_text(menu_shot_path)
convert_btn_center = None
for bbox, text, conf in ocr_results:
if "转文字" in text or "转换为文字" in text:
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
convert_btn_center = (c_x, c_y)
logger.info(f"✅ OCR 找到 '{text}' 按钮: {convert_btn_center}")
break
if convert_btn_center:
# 4. 点击转文字
logger.info(f"👆 点击转文字按钮: {convert_btn_center}")
d.click(convert_btn_center[0], convert_btn_center[1])
# 5. 动态等待
duration_str = content.replace('"', '').strip()
try:
duration = int(duration_str)
except:
duration = 10
wait_seconds = max(2, duration / 5.0)
logger.info(f"⏳ 语音时长 {duration}s模拟等待 {wait_seconds:.1f}s...")
time.sleep(wait_seconds)
logger.info("✅ 流程执行完毕!请检查手机屏幕是否已开始转换。")
else:
logger.error("❌ OCR 未找到 '转文字' 按钮!")
# 点击空白处关闭
d.click(vx + 200, vy)
if __name__ == "__main__":
if sys.platform.startswith('win'):
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(main())

File diff suppressed because it is too large Load Diff