This commit is contained in:
HuangHai
2026-01-31 08:07:58 +08:00
parent cb6ddbb76d
commit a21676bacf
2 changed files with 54 additions and 62 deletions

View File

@@ -222,7 +222,8 @@ class ChatMonitorBot:
self.last_screen_hash = current_screen_hash
logger.info("📸 屏幕发生变化,正在分析...")
# C. 分析最新图片
# C. 分析最新图片:识别发送者、消息类型及内容
logger.info("正在分析聊天界面...")
dialogue_log, input_pos = await WxUtil.analyze_chat_image(
self.screenshot_path,
self.debug_view_path,
@@ -232,114 +233,105 @@ class ChatMonitorBot:
)
if not dialogue_log:
logger.info("😴 未识别到有效消息")
# logger.info("未检测到有效对话内容")
await asyncio.sleep(self.check_interval)
continue
logger.info(f"📊 当前识别到 {len(dialogue_log)} 条消息,最后一条: {dialogue_log[-1]}")
# 更新当前对话日志(可用于上下文参考)
# 更新当前对话日志
self.dialogue_log = dialogue_log
self.input_pos = input_pos
# D. 只关注最后一条消息
# D. 提取最新消息并检查是否需要回复
last_msg = dialogue_log[-1]
# 计算稳定哈希(忽略坐标变化)
current_msg_hash = self.get_stable_message_hash(last_msg)
# E. 判断是否需要回复 (对方发送且非重复消息)
sender = last_msg.get('sender', '')
# Check if hash is already processed (in-memory only)
# 检查该消息是否已经处理过 (通过内容哈希)
is_processed = current_msg_hash in self.processed_hashes
# Log only if it changed from last *in-memory* check to avoid spam
if is_processed and current_msg_hash != self.last_processed_msg_hash:
# logger.info(f"🚫 [监控] 消息哈希已存在于历史记录中,跳过回复 (Hash: {current_msg_hash})")
self.last_processed_msg_hash = current_msg_hash
if not is_processed and current_msg_hash != self.last_processed_msg_hash:
if sender != "":
event_shot = WxUtil.get_next_debug_path("event_new_msg")
self.device.screenshot(event_shot)
logger.info(f"💡 [监控] 发现新消息: {last_msg},保存现场截图: {event_shot}")
logger.info(f"💡 发现新消息 [{last_msg.get('type')}]: {last_msg.get('content')}")
# 获取上下文文本 (格式化为 Sender: Content)
# 记录发现新消息的现场截图
msg_shot_path = os.path.join(WxUtil.OUTPUT_DIR, f"NewMsg_{int(time.time())}.jpg")
self.device.screenshot(msg_shot_path)
logger.info(f"已保存新消息现场截图: {msg_shot_path}")
# 获取上下文文本
context_text = "\n".join([f"{m.get('time_display', '') + ' ' if m.get('time_display') else ''}{m.get('sender')}: {m.get('content')}" for m in dialogue_log[:-1]])
last_content = last_msg.get('content') or ""
# 兜底逻辑:如果最后一条是语音且内容为空(可能因无红点未被 UNREAD 策略处理),尝试强制转换
# 兜底逻辑:语音消息若无文字内容,尝试强制触发重试
if last_msg.get('type') == 'voice' and not last_content.strip():
logger.info("⚠️ [监控] 最后一条语音消息未获取到内容(可能已读无红点),尝试强制转换...")
# 强制使用 LAST 策略重试,并且不还原状态(保持文字展开),防止下次循环因无内容再次触发重试
logger.info("检测到未成功转换的语音消息,尝试强制重试 OCR 转换...")
dialogue_log_retry, _ = await WxUtil.analyze_chat_image(
self.screenshot_path,
self.debug_view_path,
self.screenshot_path,
self.debug_view_path,
device=self.device,
process_strategy="LAST",
restore_processed_voice=False
process_strategy="LAST",
restore_processed_voice=False
)
if dialogue_log_retry:
# 更新引用
self.dialogue_log = dialogue_log_retry
dialogue_log = dialogue_log_retry
last_msg = dialogue_log[-1]
last_msg = dialogue_log_retry[-1]
last_content = last_msg.get('content') or ""
logger.info(f"🔄 [重试] 强制转换后内容: {last_content}")
# 重新构建哈希
current_msg_hash = self.get_stable_message_hash(last_msg)
# 再次检查是否已处理 (因为内容变了,哈希变了)
if current_msg_hash in self.processed_hashes:
logger.info(f"🚫 [重试] 转换后发现该消息已处理,跳过。")
self.last_processed_msg_hash = current_msg_hash
# 跳过本次循环的剩余部分
continue
# 生成回复
# 语音消息若重试后仍无内容,暂不回复
if last_msg.get('type') == 'voice' and not last_content.strip():
logger.warning("语音消息内容为空,暂不生成回复")
await asyncio.sleep(self.check_interval)
continue
# E. 生成回复
reply = await self.get_reply(last_content, context_text)
if reply:
logger.info(f"🤖 [监控] LLM 建议回复: {reply}")
logger.info(f"LLM 建议回复: {reply}")
if self.input_pos:
logger.info(f"⚡ [监控] 执行自动回复...")
# input_pos 是 ((x,y), box) 格式,取第一个元素坐标点
target_pos = self.input_pos[0] if isinstance(self.input_pos, (list, tuple)) and len(self.input_pos) == 2 and isinstance(self.input_pos[0], (list, tuple)) else self.input_pos
# 简单兼容处理:如果 input_pos[0] 是 tuple/list 且 input_pos[1] 是 None/box则取 input_pos[0]
if isinstance(self.input_pos, (list, tuple)) and len(self.input_pos) == 2 and isinstance(self.input_pos[0], (list, tuple)):
target_pos = self.input_pos[0]
# [User Request] 语音转文字未关闭 -> 顺序不对
# 在回复发送前,尝试关闭语音转文字(如果最后一条是语音)
# 这样截图看起来更干净,且符合用户期望的顺序
# 如果触发回复的消息是语音,先点击语音条以清理状态
if last_msg.get('type') == 'voice':
logger.info("🧹 [监控] 尝试关闭语音转文字显示...")
logger.info("回复前点击语音消息中心以关闭转文字遮罩")
try:
# 点击语音消息中心,关闭文字
cx, cy = last_msg['center']
cx, cy = last_msg.get('center', (0, 0))
WxUtil.safe_device_click(self.device, cx, cy)
# 稍微等待界面刷新
await asyncio.sleep(1)
await asyncio.sleep(1.5)
self.device.screenshot(self.screenshot_path)
except Exception as e:
logger.warning(f"关闭语音转文字失败: {e}")
logger.warning(f"清理语音状态失败: {e}")
perform_input_action(self.device, target_pos, reply)
# 确定输入框位置
target_pos = self.input_pos[0] if isinstance(self.input_pos, (list, tuple)) and len(self.input_pos) == 2 else self.input_pos
# 发送后截图留存
reply_sent_shot = WxUtil.get_next_debug_path("event_reply_sent")
self.device.screenshot(reply_sent_shot)
logger.info(f"✅ [监控] 回复已发送,保存发送后截图: {reply_sent_shot}")
# 执行输入和发送动作,并保存过程截图
success = perform_input_action(
self.device,
target_pos,
reply,
auto_send=True,
debug_prefix=f"Reply_{int(time.time())}"
)
self._record_processed_hash(last_msg, current_msg_hash)
self.last_processed_msg_hash = current_msg_hash
if success:
logger.info(">>> 回复发送成功 <<<")
self._record_processed_hash(last_msg, current_msg_hash)
self.last_processed_msg_hash = current_msg_hash
else:
logger.error("回复动作执行失败")
else:
logger.error("❌ 未找到输入框位置,无法发送回复")
logger.error("无法定位输入框坐标,放弃本次回复")
else:
logger.info("⚪ [监控] LLM 认为无需回复")
logger.info("LLM 认为无需回复")
self._record_processed_hash(last_msg, current_msg_hash)
self.last_processed_msg_hash = current_msg_hash
else:
# 是我发的消息,更新哈希,不再处理
self.last_processed_msg_hash = current_msg_hash
await asyncio.sleep(self.check_interval)

View File

@@ -194,8 +194,8 @@ def setup_script_environment():
# 重置调试计数器
global _debug_counter
_debug_counter = 0
# 排除当前正在使用的日志文件
clear_directory(LOG_DIR, exclude_files=["T2_ChatMonitor.log", "WxUtil.log"])
# 清理所有日志和图片,确保新一轮运行有干净的环境
clear_directory(LOG_DIR)
clear_directory(OUTPUT_DIR)
def connect_device():