2026-01-25 10:28:31 +08:00
|
|
|
|
# coding=utf-8
|
2026-01-25 17:08:40 +08:00
|
|
|
|
import asyncio
|
2026-01-25 10:28:31 +08:00
|
|
|
|
import logging
|
|
|
|
|
|
import os
|
2026-01-25 17:08:40 +08:00
|
|
|
|
import sys
|
|
|
|
|
|
import time
|
2026-01-25 10:28:31 +08:00
|
|
|
|
from datetime import datetime
|
|
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
import uiautomator2 as u2
|
|
|
|
|
|
|
2026-01-25 10:28:31 +08:00
|
|
|
|
# 添加项目根目录到 sys.path
|
|
|
|
|
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
if project_root not in sys.path:
|
|
|
|
|
|
sys.path.append(project_root)
|
|
|
|
|
|
|
2026-01-25 15:06:17 +08:00
|
|
|
|
from Util import Win32Patch
|
|
|
|
|
|
|
2026-01-25 18:17:37 +08:00
|
|
|
|
from WeiXin.WxUtil import perform_input_action, clean_screenshots_dir, find_template_match, find_all_template_matches
|
2026-01-25 10:28:31 +08:00
|
|
|
|
from Util.LlmUtil import get_llm_response
|
2026-01-25 13:42:17 +08:00
|
|
|
|
from Util.EasyOcrKit import EasyOcrKit
|
2026-01-25 10:28:31 +08:00
|
|
|
|
|
|
|
|
|
|
# 配置日志
|
|
|
|
|
|
log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Logs")
|
|
|
|
|
|
if not os.path.exists(log_dir):
|
|
|
|
|
|
os.makedirs(log_dir)
|
|
|
|
|
|
|
2026-01-25 14:11:04 +08:00
|
|
|
|
# 设置 logger
|
|
|
|
|
|
logger = logging.getLogger("T6_AutoChatMonitor")
|
|
|
|
|
|
logger.setLevel(logging.INFO)
|
|
|
|
|
|
|
|
|
|
|
|
# 清除现有的 handlers,防止重复打印或配置冲突
|
|
|
|
|
|
if logger.hasHandlers():
|
|
|
|
|
|
logger.handlers.clear()
|
|
|
|
|
|
|
|
|
|
|
|
# 创建 FileHandler
|
|
|
|
|
|
log_file_path = os.path.join(log_dir, "T6_AutoChatMonitor.log")
|
|
|
|
|
|
file_handler = logging.FileHandler(log_file_path, encoding='utf-8', mode='w')
|
|
|
|
|
|
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
|
|
|
|
|
logger.addHandler(file_handler)
|
|
|
|
|
|
|
|
|
|
|
|
# 创建 StreamHandler
|
|
|
|
|
|
stream_handler = logging.StreamHandler()
|
|
|
|
|
|
stream_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
|
|
|
|
|
logger.addHandler(stream_handler)
|
|
|
|
|
|
|
|
|
|
|
|
# 防止日志传播到 root logger,避免重复输出
|
|
|
|
|
|
logger.propagate = False
|
|
|
|
|
|
|
|
|
|
|
|
# 打印日志文件位置,方便确认
|
|
|
|
|
|
logger.info(f"日志文件路径: {log_file_path}")
|
2026-01-25 10:28:31 +08:00
|
|
|
|
|
|
|
|
|
|
# 配置参数
|
2026-01-25 15:06:17 +08:00
|
|
|
|
CHECK_INTERVAL = 5 # 检查频率 (秒)
|
2026-01-25 10:28:31 +08:00
|
|
|
|
|
|
|
|
|
|
class ChatBot:
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
self.d = u2.connect()
|
|
|
|
|
|
self.last_message_text = ""
|
2026-01-25 15:06:17 +08:00
|
|
|
|
self.last_processed_msg = None # 记录上一条已处理/回复过的对方消息内容
|
2026-01-25 10:28:31 +08:00
|
|
|
|
self.screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
|
|
|
|
|
|
if not os.path.exists(self.screenshot_dir):
|
|
|
|
|
|
os.makedirs(self.screenshot_dir)
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
self.ocr_kit = EasyOcrKit(gpu=True)
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
|
|
|
|
|
self.is_first_run = True # 首次运行标志
|
2026-01-25 10:28:31 +08:00
|
|
|
|
|
|
|
|
|
|
self.persona = (
|
2026-01-25 12:52:52 +08:00
|
|
|
|
"你是一名1999年毕业、拥有27年一线教学经验的小学高级女教师,名叫‘大张老师’。你目前在‘长春市少惠林作文素养培养中心’工作。"
|
|
|
|
|
|
"你不仅是一位作文教学专家,更是一位心思细腻、能与家长共情的教育智者。"
|
|
|
|
|
|
"你的回复风格应该是:温柔、知性、亲切,就像一位邻家大姐姐在聊天。"
|
|
|
|
|
|
"【严格约束】:\n"
|
|
|
|
|
|
"1. 绝对禁止发散!绝对禁止幻觉!\n"
|
|
|
|
|
|
"2. 知道什么就说什么,不要乱讲话,不要自己编造内容!\n"
|
|
|
|
|
|
"3. 仅针对家长明确表达的内容进行回复。\n"
|
|
|
|
|
|
"4. 严禁使用列表格式。严禁使用‘首先、其次’等逻辑词。\n"
|
|
|
|
|
|
"5. 回复必须简练,字数严格控制在 50 字以内!\n"
|
|
|
|
|
|
"如果涉及到校区信息,必须且只能使用以下真实数据:\n"
|
2026-01-25 11:52:06 +08:00
|
|
|
|
"- 单位:长春市少惠林作文素养培养中心\n"
|
|
|
|
|
|
"- 地址:南环城路与临河街交汇,TOUCH12街3楼325号\n"
|
2026-01-25 14:40:30 +08:00
|
|
|
|
"- 联系人:小张老师(电话:18686619970)\n"
|
|
|
|
|
|
"- 每学期开学招收小学三年级至六年级,初中七年级的学生入学,其它年段不招生。\n"
|
2026-01-25 10:28:31 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2026-01-25 15:06:17 +08:00
|
|
|
|
async def get_reply(self, history_text):
|
|
|
|
|
|
prompt = (
|
|
|
|
|
|
f"【教师人设】:{self.persona}\n\n"
|
|
|
|
|
|
f"【近期聊天记录】:\n{history_text}\n\n"
|
|
|
|
|
|
"【任务要求】:\n"
|
|
|
|
|
|
"请作为大张老师回复家长。**必须且只能针对聊天记录中的最后一条消息进行回复!**\n"
|
|
|
|
|
|
"之前的聊天记录仅供参考上下文,如果之前的问题已经回答过,绝对不要重复回答。\n"
|
|
|
|
|
|
"严禁发散,严禁编造家长没说过的情况。如果不清楚家长的意图,就温柔询问。\n"
|
|
|
|
|
|
"字数严格控制在 50 字以内。直接输出回复正文。"
|
|
|
|
|
|
)
|
2026-01-25 10:28:31 +08:00
|
|
|
|
|
|
|
|
|
|
full_response = ""
|
|
|
|
|
|
async for chunk in get_llm_response(prompt, stream=False):
|
|
|
|
|
|
full_response += chunk
|
|
|
|
|
|
return full_response.strip().strip('"').strip('“').strip('”')
|
|
|
|
|
|
|
2026-01-25 14:11:04 +08:00
|
|
|
|
async def process_single_voice(self, voice_msg, next_msg=None, input_box_y=None):
|
2026-01-25 13:42:17 +08:00
|
|
|
|
"""
|
|
|
|
|
|
处理单个语音消息的完整流程:
|
2026-01-25 14:11:04 +08:00
|
|
|
|
长按 -> CV找转文字 -> 点击 -> 等待 -> 截图OCR -> 长按 -> CV找取消转文字 -> 点击
|
2026-01-25 13:42:17 +08:00
|
|
|
|
返回: 转换后的文本内容 (如果没有转换成功,返回 None)
|
|
|
|
|
|
"""
|
|
|
|
|
|
vx, vy = voice_msg['coordinates']
|
|
|
|
|
|
content = voice_msg.get('content', '0"')
|
|
|
|
|
|
logger.info(f"🎤 开始处理语音消息: {content}, 坐标: ({vx}, {vy})")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
2026-01-25 15:06:17 +08:00
|
|
|
|
# 1. 长按语音消息
|
|
|
|
|
|
logger.info("👆 正在长按语音消息...")
|
|
|
|
|
|
self.d.long_click(vx, vy, 0.6)
|
|
|
|
|
|
logger.info("✅ 长按完成,等待菜单...")
|
|
|
|
|
|
time.sleep(0.3)
|
|
|
|
|
|
|
|
|
|
|
|
# 2. CV 模板匹配寻找 "转文字" 按钮
|
|
|
|
|
|
menu_shot_path = os.path.join(self.screenshot_dir, "t6_menu_shot_convert.jpg")
|
|
|
|
|
|
logger.info(f"📸 截取菜单图: {menu_shot_path}")
|
|
|
|
|
|
self.d.screenshot(menu_shot_path)
|
|
|
|
|
|
|
|
|
|
|
|
convert_template = r"d:\dsWork\aiData\WeiXin\Templates\zhun_wen_zi.jpg"
|
|
|
|
|
|
logger.info(f"🔍 寻找模板: {convert_template}")
|
2026-01-25 17:08:40 +08:00
|
|
|
|
convert_btn = find_template_match(menu_shot_path, convert_template, threshold=0.6)
|
2026-01-25 15:06:17 +08:00
|
|
|
|
|
|
|
|
|
|
if not convert_btn:
|
2026-01-25 17:08:40 +08:00
|
|
|
|
logger.warning("❌ CV 未找到 '转文字' 按钮,尝试小范围 OCR 兜底...")
|
|
|
|
|
|
# 尝试在该区域进行 OCR 识别,寻找 "转文字" 三个字
|
|
|
|
|
|
ocr_results_menu = self.ocr_kit.read_text(menu_shot_path)
|
|
|
|
|
|
for bbox, text, conf in ocr_results_menu:
|
|
|
|
|
|
if "转文字" in text or "转文" in text or "文字" in text:
|
|
|
|
|
|
cx = (bbox[0][0] + bbox[2][0]) / 2
|
|
|
|
|
|
cy = (bbox[0][1] + bbox[2][1]) / 2
|
|
|
|
|
|
convert_btn = (cx, cy)
|
|
|
|
|
|
logger.info(f"✅ OCR 兜底找到 '转文字' 按钮: {convert_btn}")
|
|
|
|
|
|
break
|
2026-01-25 15:06:17 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
if not convert_btn:
|
|
|
|
|
|
logger.warning("❌ CV 和 OCR 均未找到 '转文字' 按钮,取消操作。")
|
|
|
|
|
|
# 点击屏幕中心区域的空白处关闭菜单,避免点到顶部返回键
|
|
|
|
|
|
self.d.click(500, 500)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
logger.info(f"✅ 最终找到 '转文字' 按钮坐标: {convert_btn}")
|
2026-01-25 15:06:17 +08:00
|
|
|
|
self.d.click(convert_btn[0], convert_btn[1])
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 动态等待转换
|
|
|
|
|
|
duration_str = content.replace('"', '').strip()
|
|
|
|
|
|
try:
|
|
|
|
|
|
duration = int(duration_str)
|
|
|
|
|
|
except:
|
|
|
|
|
|
duration = 10
|
|
|
|
|
|
wait_seconds = max(2, duration / 5.0)
|
|
|
|
|
|
logger.info(f"⏳ 语音时长 {duration}s,等待转换 {wait_seconds:.1f}s...")
|
|
|
|
|
|
time.sleep(wait_seconds)
|
|
|
|
|
|
|
|
|
|
|
|
# 4. 截图并 OCR 识别内容
|
|
|
|
|
|
ocr_shot_path = os.path.join(self.screenshot_dir, "t6_ocr_shot.jpg")
|
|
|
|
|
|
logger.info(f"📸 截取 OCR 识别图: {ocr_shot_path}")
|
|
|
|
|
|
self.d.screenshot(ocr_shot_path)
|
|
|
|
|
|
|
|
|
|
|
|
# OCR 识别
|
|
|
|
|
|
# 策略:识别整个屏幕,但只提取位于当前语音消息下方,且在下一条消息(如果有)上方的内容
|
|
|
|
|
|
logger.info("📖 开始 OCR 识别...")
|
|
|
|
|
|
ocr_results = self.ocr_kit.read_text(ocr_shot_path)
|
|
|
|
|
|
logger.info(f"✅ OCR 识别完成,获取 {len(ocr_results)} 个文本块")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ process_single_voice 发生异常: {e}", exc_info=True)
|
|
|
|
|
|
return None
|
2026-01-25 14:11:04 +08:00
|
|
|
|
|
|
|
|
|
|
# 按 Y 坐标排序,确保从上往下处理
|
|
|
|
|
|
ocr_results.sort(key=lambda x: (x[0][0][1] + x[0][2][1]) / 2)
|
|
|
|
|
|
|
2026-01-25 13:42:17 +08:00
|
|
|
|
extracted_text = []
|
|
|
|
|
|
|
2026-01-25 14:11:04 +08:00
|
|
|
|
# 准备下一条消息的内容片段作为停止条件
|
|
|
|
|
|
next_msg_snippet = None
|
|
|
|
|
|
if next_msg and next_msg.get("type") == "text":
|
|
|
|
|
|
c = next_msg.get("content", "").strip()
|
|
|
|
|
|
if c:
|
|
|
|
|
|
next_msg_snippet = c[:8] # 取前8个字符作为指纹
|
|
|
|
|
|
|
2026-01-25 13:42:17 +08:00
|
|
|
|
for bbox, text, conf in ocr_results:
|
|
|
|
|
|
# bbox center y
|
|
|
|
|
|
c_y = (bbox[0][1] + bbox[2][1]) / 2
|
2026-01-25 14:11:04 +08:00
|
|
|
|
|
|
|
|
|
|
# 1. 过滤掉当前语音气泡及以上的内容
|
|
|
|
|
|
# 语音气泡中心是 vy,底部大概在 vy + 30 左右
|
|
|
|
|
|
if c_y <= vy + 25:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# 2. 如果有输入框坐标,过滤掉输入框以下的内容
|
|
|
|
|
|
if input_box_y and c_y >= input_box_y - 30:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# 3. 如果遇到下一条消息的内容,停止读取
|
|
|
|
|
|
if next_msg_snippet and next_msg_snippet in text:
|
|
|
|
|
|
logger.info(f"🛑 遇到下一条消息内容 '{text}',停止 OCR 录入。")
|
|
|
|
|
|
break
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
2026-01-25 14:11:04 +08:00
|
|
|
|
# 4. 如果下一条是语音,尝试通过时长文本判断停止
|
|
|
|
|
|
if next_msg and next_msg.get("type") == "voice":
|
|
|
|
|
|
v_dur = next_msg.get("content", "").strip()
|
|
|
|
|
|
# 语音时长通常比较短,且包含 " 符号
|
|
|
|
|
|
if v_dur and v_dur in text and len(text) < 10:
|
|
|
|
|
|
logger.info(f"🛑 遇到下一条语音时长 '{text}',停止 OCR 录入。")
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
# 5. 安全兜底:如果距离当前语音气泡太远(超过600像素),停止
|
|
|
|
|
|
# 这可以防止读取到屏幕底部无关的内容
|
|
|
|
|
|
if c_y > vy + 600:
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
extracted_text.append(text)
|
|
|
|
|
|
|
2026-01-25 13:42:17 +08:00
|
|
|
|
full_text = " ".join(extracted_text)
|
|
|
|
|
|
logger.info(f"📝 OCR 识别结果: {full_text}")
|
|
|
|
|
|
|
|
|
|
|
|
# 5. 再次长按语音消息 (为了取消转换)
|
|
|
|
|
|
# 注意:转换出文字后,界面可能会发生位移。
|
|
|
|
|
|
# 但通常语音气泡的相对位置(如果是最后一条)可能变化不大,或者我们假设用户不滑动
|
|
|
|
|
|
# 更稳妥的是:重新识别一次语音气泡位置?
|
|
|
|
|
|
# 用户说:"这样原来什么样,识别完就是什么样",意味着我们要恢复原状。
|
|
|
|
|
|
# 我们假设点击原来的位置还能点到语音气泡(如果它没被顶上去太多)
|
|
|
|
|
|
# 或者,我们可以点击转换出来的文字区域?
|
|
|
|
|
|
# 让我们尝试点击原来的坐标。
|
|
|
|
|
|
|
2026-01-25 14:40:30 +08:00
|
|
|
|
self.d.long_click(vx, vy, 0.6)
|
|
|
|
|
|
time.sleep(0.3)
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
|
|
|
|
|
# 6. CV 模板匹配寻找 "取消转文字" 按钮
|
|
|
|
|
|
menu_shot_path_cancel = os.path.join(self.screenshot_dir, "t6_menu_shot_cancel.jpg")
|
|
|
|
|
|
self.d.screenshot(menu_shot_path_cancel)
|
|
|
|
|
|
|
|
|
|
|
|
cancel_template = r"d:\dsWork\aiData\WeiXin\Templates\cancel_zhuan_wen_zi.jpg"
|
2026-01-25 17:08:40 +08:00
|
|
|
|
cancel_btn = find_template_match(menu_shot_path_cancel, cancel_template, threshold=0.6)
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
|
|
|
|
|
if cancel_btn:
|
|
|
|
|
|
logger.info(f"✅ CV 找到 '取消转文字' 按钮: {cancel_btn}")
|
|
|
|
|
|
self.d.click(cancel_btn[0], cancel_btn[1])
|
|
|
|
|
|
else:
|
2026-01-25 17:08:40 +08:00
|
|
|
|
logger.warning("❌ CV 未找到 '取消转文字' 按钮,点击中心区域关闭菜单。")
|
|
|
|
|
|
self.d.click(500, 500)
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
|
|
|
|
|
return full_text
|
|
|
|
|
|
|
2026-01-25 10:28:31 +08:00
|
|
|
|
async def run(self):
|
|
|
|
|
|
logger.info("🚀 大张老师自动巡课系统启动...")
|
|
|
|
|
|
|
|
|
|
|
|
# 0. 清除旧截图
|
|
|
|
|
|
clean_screenshots_dir()
|
|
|
|
|
|
|
2026-01-25 14:40:30 +08:00
|
|
|
|
last_screen_md5 = None
|
|
|
|
|
|
|
2026-01-25 10:28:31 +08:00
|
|
|
|
while True:
|
|
|
|
|
|
try:
|
2026-01-25 11:52:06 +08:00
|
|
|
|
logger.info("🔍 正在扫描当前界面内容...")
|
2026-01-25 18:17:37 +08:00
|
|
|
|
|
2026-01-25 13:42:17 +08:00
|
|
|
|
# 1. 截图
|
|
|
|
|
|
tmp_shot = os.path.join(self.screenshot_dir, "t6_monitor_temp.jpg")
|
2026-01-25 11:52:06 +08:00
|
|
|
|
logger.info(f"📸 正在截取屏幕... ({datetime.now().strftime('%H:%M:%S')})")
|
2026-01-25 10:28:31 +08:00
|
|
|
|
self.d.screenshot(tmp_shot)
|
|
|
|
|
|
|
2026-01-25 14:40:30 +08:00
|
|
|
|
# 计算 MD5 并去重
|
|
|
|
|
|
import hashlib
|
|
|
|
|
|
with open(tmp_shot, 'rb') as f:
|
|
|
|
|
|
current_md5 = hashlib.md5(f.read()).hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
if last_screen_md5 and current_md5 == last_screen_md5:
|
|
|
|
|
|
logger.info("😴 屏幕内容未变,跳过本次循环。")
|
|
|
|
|
|
await asyncio.sleep(CHECK_INTERVAL)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
last_screen_md5 = current_md5
|
|
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
# 2. 本地视觉分析 (替代 VLM)
|
|
|
|
|
|
logger.info("<EFBFBD>️ 正在进行本地视觉扫描...")
|
2026-01-25 14:40:30 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
# A. 寻找语音图标 (audio.jpg) 和 红点 (red_point.jpg)
|
|
|
|
|
|
audio_template = r"d:\dsWork\aiData\WeiXin\Templates\audio.jpg"
|
2026-01-25 14:40:30 +08:00
|
|
|
|
red_point_template = r"d:\dsWork\aiData\WeiXin\Templates\red_point.jpg"
|
2026-01-25 17:08:40 +08:00
|
|
|
|
|
|
|
|
|
|
audio_matches = find_all_template_matches(tmp_shot, audio_template, threshold=0.8)
|
2026-01-25 14:40:30 +08:00
|
|
|
|
red_points = find_all_template_matches(tmp_shot, red_point_template, threshold=0.8)
|
|
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
# B. 本地 OCR 识别全文以构建上下文
|
|
|
|
|
|
ocr_results = self.ocr_kit.read_text(tmp_shot)
|
|
|
|
|
|
# 按 Y 坐标排序
|
|
|
|
|
|
ocr_results.sort(key=lambda x: (x[0][0][1] + x[0][2][1]) / 2)
|
2026-01-25 10:28:31 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
dialogue_log = []
|
|
|
|
|
|
voice_messages = []
|
|
|
|
|
|
|
|
|
|
|
|
# 准备可视化调试图
|
|
|
|
|
|
import cv2
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
|
debug_img = cv2.imread(tmp_shot)
|
2026-01-25 14:11:04 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
# 记录已匹配到语音图标的 OCR 块索引
|
|
|
|
|
|
matched_ocr_indices = set()
|
2026-01-25 14:11:04 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
# 先处理语音图标匹配
|
|
|
|
|
|
for ax, ay in audio_matches:
|
|
|
|
|
|
# 排除顶部标题栏(0-300)和底部输入区(1800+)
|
|
|
|
|
|
if ay < 300 or ay > 1800:
|
|
|
|
|
|
logger.info(f"⏭️ 忽略区域外语音图标: ({ax}, {ay})")
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
sender = "对方" if ax < 500 else "我"
|
|
|
|
|
|
logger.info(f"🎙️ 发现语音图标: x={ax}, y={ay}, 发送者={sender}")
|
|
|
|
|
|
is_unread = False
|
|
|
|
|
|
if red_points:
|
|
|
|
|
|
for rx, ry in red_points:
|
|
|
|
|
|
# 红点通常在语音图标右侧,且 Y 轴相近
|
|
|
|
|
|
if abs(ry - ay) < 50 and rx > ax:
|
|
|
|
|
|
is_unread = True
|
|
|
|
|
|
# 绘制红点
|
|
|
|
|
|
cv2.circle(debug_img, (int(rx), int(ry)), 12, (0, 0, 255), -1)
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
# 寻找附近的时长文字 (OCR)
|
|
|
|
|
|
duration_text = "语音"
|
|
|
|
|
|
for idx, (bbox, text, conf) in enumerate(ocr_results):
|
|
|
|
|
|
c_x = (bbox[0][0] + bbox[2][0]) / 2
|
|
|
|
|
|
c_y = (bbox[0][1] + bbox[2][1]) / 2
|
|
|
|
|
|
if abs(c_y - ay) < 40 and abs(c_x - ax) < 300:
|
|
|
|
|
|
if '"' in text or text.isdigit():
|
|
|
|
|
|
duration_text = text
|
|
|
|
|
|
matched_ocr_indices.add(idx)
|
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
# 计算点击坐标:直接点击语音图标中心
|
|
|
|
|
|
click_x, click_y = ax, ay
|
|
|
|
|
|
|
|
|
|
|
|
# 绘制视觉反馈
|
2026-01-25 18:17:37 +08:00
|
|
|
|
# 1. 语音图标用绿框
|
2026-01-25 17:08:40 +08:00
|
|
|
|
cv2.rectangle(debug_img, (int(ax-30), int(ay-30)), (int(ax+30), int(ay+30)), (0, 255, 0), 3)
|
2026-01-25 18:17:37 +08:00
|
|
|
|
# 2. 点击位置用红点 (用户偏好)
|
|
|
|
|
|
cv2.circle(debug_img, (int(click_x), int(click_y)), 15, (0, 0, 255), -1)
|
2026-01-25 14:11:04 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
v_msg = {
|
|
|
|
|
|
"type": "voice",
|
|
|
|
|
|
"content": duration_text,
|
|
|
|
|
|
"coordinates": [click_x, click_y],
|
|
|
|
|
|
"sender": sender,
|
|
|
|
|
|
"is_unread": is_unread
|
|
|
|
|
|
}
|
|
|
|
|
|
if sender == "对方":
|
|
|
|
|
|
voice_messages.append(v_msg)
|
|
|
|
|
|
dialogue_log.append({
|
|
|
|
|
|
"y": ay,
|
|
|
|
|
|
"text": f"{sender}: [语音] {duration_text}",
|
|
|
|
|
|
"is_voice": True,
|
|
|
|
|
|
"id": f"voice_{ax}_{ay}"
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
# 处理剩余的 OCR 文字块 (普通文本)
|
|
|
|
|
|
for idx, (bbox, text, conf) in enumerate(ocr_results):
|
|
|
|
|
|
if idx in matched_ocr_indices: continue
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
x_min, x_max = bbox[0][0], bbox[2][0]
|
|
|
|
|
|
y_min, y_max = bbox[0][1], bbox[2][1]
|
|
|
|
|
|
c_x, c_y = (x_min + x_max) / 2, (y_min + y_max) / 2
|
|
|
|
|
|
|
|
|
|
|
|
if c_y < 300 or c_y > 1800: continue
|
|
|
|
|
|
|
|
|
|
|
|
if x_min < 250 and x_max < 700:
|
|
|
|
|
|
sender, color = "对方", (0, 255, 0)
|
|
|
|
|
|
elif x_max > 800 and x_min > 300:
|
|
|
|
|
|
sender, color = "我", (255, 0, 0)
|
2026-01-25 13:42:17 +08:00
|
|
|
|
else:
|
2026-01-25 17:08:40 +08:00
|
|
|
|
sender, color = "系统", (128, 128, 128)
|
|
|
|
|
|
|
|
|
|
|
|
if sender != "系统":
|
|
|
|
|
|
logger.info(f"💬 发现文本消息: x={c_x}, y={c_y}, 发送者={sender}, 内容={text}")
|
|
|
|
|
|
cv2.rectangle(debug_img, (int(x_min), int(y_min)), (int(x_max), int(y_max)), color, 1)
|
|
|
|
|
|
dialogue_log.append({
|
|
|
|
|
|
"y": c_y,
|
|
|
|
|
|
"text": f"{sender}: {text}",
|
|
|
|
|
|
"is_voice": False
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
# 按 Y 轴重新排序整个对话日志
|
|
|
|
|
|
dialogue_log.sort(key=lambda x: x['y'])
|
|
|
|
|
|
|
|
|
|
|
|
# 保存调试图
|
|
|
|
|
|
debug_shot_path = os.path.join(self.screenshot_dir, "t6_debug_view.jpg")
|
|
|
|
|
|
cv2.imwrite(debug_shot_path, debug_img)
|
|
|
|
|
|
logger.info(f"🎨 已保存视觉调试图: {debug_shot_path}")
|
|
|
|
|
|
|
|
|
|
|
|
# C. 寻找输入框 (CV 模板匹配)
|
|
|
|
|
|
input_template = r"d:\dsWork\aiData\WeiXin\Templates\input_box.jpg" # 假设有这个模板
|
|
|
|
|
|
input_center = find_template_match(tmp_shot, input_template, threshold=0.6)
|
|
|
|
|
|
if not input_center:
|
|
|
|
|
|
# 几何兜底:屏幕底部 88% 处
|
|
|
|
|
|
from PIL import Image
|
|
|
|
|
|
with Image.open(tmp_shot) as img:
|
|
|
|
|
|
w, h = img.size
|
|
|
|
|
|
input_center = [w // 2, int(h * 0.88)]
|
|
|
|
|
|
logger.info(f"<EFBFBD> 使用几何兜底输入框坐标: {input_center}")
|
2026-01-25 11:52:06 +08:00
|
|
|
|
|
2026-01-25 13:42:17 +08:00
|
|
|
|
# 4. 语音处理逻辑
|
|
|
|
|
|
processed_voice_content = None
|
2026-01-25 14:11:04 +08:00
|
|
|
|
input_y = input_center[1] if input_center else None
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
# 只有未读的才处理
|
|
|
|
|
|
for v_msg in voice_messages:
|
|
|
|
|
|
if v_msg.get("is_unread") or self.is_first_run:
|
|
|
|
|
|
logger.info(f"🔴 发现未读/待处理语音: {v_msg['content']}")
|
|
|
|
|
|
# 找到 OCR 结果中的下一条作为边界
|
|
|
|
|
|
idx = -1
|
|
|
|
|
|
# 这里简化逻辑,直接处理
|
|
|
|
|
|
text = await self.process_single_voice(v_msg, None, input_y)
|
2026-01-25 13:42:17 +08:00
|
|
|
|
if text:
|
2026-01-25 17:08:40 +08:00
|
|
|
|
# 更新 log 中的内容
|
|
|
|
|
|
for item in dialogue_log:
|
|
|
|
|
|
if item.get("is_voice") and f"[语音] {v_msg['content']}" in item["text"]:
|
|
|
|
|
|
item["text"] = item["text"].replace("[语音]", f"[语音转文字: {text}]")
|
|
|
|
|
|
break
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
2026-01-25 17:08:40 +08:00
|
|
|
|
self.is_first_run = False
|
|
|
|
|
|
|
|
|
|
|
|
# 5. LLM 回复逻辑
|
|
|
|
|
|
final_dialogue_texts = [item['text'] for item in dialogue_log]
|
|
|
|
|
|
history_text = "\n".join(final_dialogue_texts)
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
|
|
|
|
|
# 判断是否需要回复:
|
2026-01-25 15:06:17 +08:00
|
|
|
|
# 核心规则:只有当最后一条消息是“对方”说的,且内容未处理过,才回复。
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
|
|
|
|
|
should_reply = False
|
2026-01-25 15:06:17 +08:00
|
|
|
|
current_last_content = ""
|
|
|
|
|
|
|
2026-01-25 13:42:17 +08:00
|
|
|
|
if dialogue_log:
|
2026-01-25 17:08:40 +08:00
|
|
|
|
last_item = dialogue_log[-1]
|
|
|
|
|
|
last_log = last_item["text"]
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
|
|
|
|
|
# 检查最后一条消息的发送者
|
|
|
|
|
|
if last_log.startswith("对方"):
|
2026-01-25 15:06:17 +08:00
|
|
|
|
parts = last_log.split(":", 1)
|
|
|
|
|
|
if len(parts) > 1:
|
|
|
|
|
|
current_last_content = parts[1].strip()
|
|
|
|
|
|
else:
|
|
|
|
|
|
current_last_content = last_log
|
|
|
|
|
|
|
|
|
|
|
|
if current_last_content != self.last_processed_msg:
|
|
|
|
|
|
logger.info(f"💡 发现新消息,准备回复。内容: {current_last_content}")
|
|
|
|
|
|
should_reply = True
|
|
|
|
|
|
else:
|
|
|
|
|
|
# logger.info(f"⚪ 消息已回复过,跳过: {current_last_content}")
|
|
|
|
|
|
should_reply = False
|
2026-01-25 13:42:17 +08:00
|
|
|
|
else:
|
2026-01-25 15:06:17 +08:00
|
|
|
|
# logger.info(f"⚪ 最后一条消息是我发送的,无需回复。")
|
2026-01-25 13:42:17 +08:00
|
|
|
|
should_reply = False
|
2026-01-25 15:06:17 +08:00
|
|
|
|
# 如果最后一条是我发的,重置 last_processed_msg
|
|
|
|
|
|
self.last_processed_msg = None
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
|
|
|
|
|
if should_reply:
|
|
|
|
|
|
logger.info("🤖 准备调用 LLM 生成回复...")
|
2026-01-25 15:06:17 +08:00
|
|
|
|
# 立即更新状态,防止在回复生成期间(如果耗时)重复触发
|
|
|
|
|
|
self.last_processed_msg = current_last_content
|
|
|
|
|
|
|
2026-01-25 13:42:17 +08:00
|
|
|
|
reply = await self.get_reply(history_text)
|
2026-01-25 17:08:40 +08:00
|
|
|
|
if reply:
|
|
|
|
|
|
logger.info(f"💡 LLM 回复: {reply}")
|
|
|
|
|
|
|
|
|
|
|
|
if input_center:
|
|
|
|
|
|
# 输入并发送
|
|
|
|
|
|
perform_input_action(self.d, input_center, reply)
|
|
|
|
|
|
# 发送后,为了防止下一轮 OCR 识别到自己的回复片段并误判为对方消息
|
|
|
|
|
|
# 我们把 last_processed_msg 设置为一个特殊的占位符,直到下一次真正识别到对方的新消息
|
|
|
|
|
|
# 或者更简单:在下一轮循环开始前稍微多等一下,让消息气泡完全显示
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
# 将最后处理的消息内容标记为已处理,防止 LLM 回复逻辑在下一轮立即触发
|
|
|
|
|
|
# 注意:这里的 current_last_content 是对方的最后一条
|
|
|
|
|
|
else:
|
|
|
|
|
|
logger.warning("⚠️ LLM 未生成有效回复。")
|
2026-01-25 13:42:17 +08:00
|
|
|
|
|
|
|
|
|
|
# 休眠
|
|
|
|
|
|
await asyncio.sleep(CHECK_INTERVAL)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ 主循环发生错误: {e}", exc_info=True)
|
|
|
|
|
|
await asyncio.sleep(CHECK_INTERVAL)
|
|
|
|
|
|
|
2026-01-25 10:28:31 +08:00
|
|
|
|
if __name__ == "__main__":
|
2026-01-25 13:42:17 +08:00
|
|
|
|
Win32Patch.patch()
|
2026-01-25 10:28:31 +08:00
|
|
|
|
bot = ChatBot()
|
2026-01-25 14:11:04 +08:00
|
|
|
|
try:
|
|
|
|
|
|
asyncio.run(bot.run())
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
|
logger.info("🛑 用户手动停止程序。")
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
logger.error(f"❌ 程序异常退出: {e}", exc_info=True)
|