This commit is contained in:
HuangHai
2026-01-26 19:57:19 +08:00
parent 236171e015
commit cb72f02030
3 changed files with 108 additions and 35 deletions

View File

@@ -144,6 +144,16 @@ class ChatMonitorBot:
"""主运行循环"""
logger.info("🚀 大张老师自动巡课系统启动 (T2 增强版)...")
# 定义 JSON 序列化辅助函数
def numpy_serializer(obj):
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
raise TypeError(f"Type {type(obj)} not serializable")
# 1. 环境准备
if not self.step_1_prepare_env(): return
if not self.step_2_connect_device(): return
@@ -171,22 +181,18 @@ class ChatMonitorBot:
# 格式化输出:[发送者] 内容 (类型)
sender = msg.get('sender', '未知')
content = msg.get('content', '')
msg_type = msg.get('type', 'unknown')
logger.info(f"[{sender}] {content} ({msg_type})")
msg_type = "语音" if msg.get('type') == 'voice' else "文字"
# 按照用户要求的格式输出
logger.info(f"说话人: {sender}")
logger.info(f"消息类型: {msg_type}")
logger.info(f"消息内容: {content}")
logger.info("-" * 20)
logger.info("="*50 + "\n")
# 初始化最后处理的消息哈希,避免重复回复第一条
last_msg = self.dialogue_log[-1]
# last_msg 是字典,需要转字符串再 encode
def numpy_serializer(obj):
if isinstance(obj, np.integer):
return int(obj)
if isinstance(obj, np.floating):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
raise TypeError(f"Type {type(obj)} not serializable")
msg_str = json.dumps(last_msg, sort_keys=True, ensure_ascii=False, default=numpy_serializer)
self.last_processed_msg_hash = hashlib.md5(msg_str.encode('utf-8')).hexdigest()
self.last_screen_hash = self.get_image_hash(self.screenshot_path)
@@ -233,10 +239,13 @@ class ChatMonitorBot:
# D. 只关注最后一条消息
last_msg = dialogue_log[-1]
current_msg_hash = hashlib.md5(last_msg.encode('utf-8')).hexdigest()
# last_msg 是字典,需要序列化
msg_str = json.dumps(last_msg, sort_keys=True, ensure_ascii=False, default=numpy_serializer)
current_msg_hash = hashlib.md5(msg_str.encode('utf-8')).hexdigest()
# E. 判断是否需要回复 (对方发送且非重复消息)
if "对方:" in last_msg:
sender = last_msg.get('sender', '')
if sender == "对方":
if current_msg_hash != self.last_processed_msg_hash:
event_shot = WxUtil.get_next_debug_path("event_new_msg")
self.device.screenshot(event_shot)

View File

@@ -173,10 +173,35 @@ def _scan_chat_messages(image_path):
logger.info("正在执行 OCR 识别...")
ocr_results = ocr_kit.read_text(image_path)
# 4.5 尝试提取聊天标题 (对方昵称)
chat_title = "对方"
potential_titles = []
for bbox, text, conf in ocr_results:
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
# 标题区域通常在顶部 (状态栏下方,消息列表上方)
if 60 < c_y < 140:
clean = text.strip()
# 排除时间、信号、返回按钮等
if re.match(r'^\d{1,2}:\d{2}$', clean): continue
if "微信" in clean or "WeChat" in clean: continue
if clean in ["<", "返回", "消息", "(", ")"]: continue
if re.match(r'^\d+$', clean): continue # 排除纯数字(如未读数)
if len(clean) > 0:
potential_titles.append((c_x, clean))
if potential_titles:
# 优先取最接近水平中心的文本作为标题
potential_titles.sort(key=lambda x: abs(x[0] - w/2))
chat_title = potential_titles[0][1]
# 去除可能包含的括号(比如备注名后的群聊人数,虽然后面会被截断)
chat_title = re.sub(r'\(\d+\)$', '', chat_title).strip()
logger.info(f"识别到聊天标题/对方昵称: {chat_title}")
# 微信菜单关键字(用于排除干扰)
MENU_KEYWORDS = ["听筒播放", "收藏", "背景播放", "删除", "多选", "取消转文字", "转文字", "引用", "提醒"]
# 忽略的系统消息内容
IGNORE_CONTENT = ["撤回了一条消息", "打招呼的消息", "拍了拍", "你撤回了一条消息", "引用"]
IGNORE_CONTENT = ["撤回了一条消息", "打招呼的消息", "拍了拍", "你撤回了一条消息", "引用", "Clear Text", "Switch IME", "Done"]
# 5. 整合所有消息
messages = []
@@ -188,6 +213,8 @@ def _scan_chat_messages(image_path):
cv2.putText(debug_img, "TOP_FILTER", (10, 140), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
cv2.putText(debug_img, "BOTTOM_FILTER", (10, h - 110), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
claimed_ocr_indices = set()
# A. 添加语音消息
for ax, ay in audio_matches:
# 标记所有找到的语音图标 (用于调试)
@@ -211,12 +238,18 @@ def _scan_chat_messages(image_path):
# 改进:判断是否已转文字
is_converted = False
converted_trigger_text = ""
for bbox, text, conf in ocr_results:
associated_texts = [] # 存储关联的多行文本 [(y, x, text)]
for i, (bbox, text, conf) in enumerate(ocr_results):
if i in claimed_ocr_indices: continue
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
# 判定逻辑:文本在语音下方且水平偏移不大
if 30 < c_y - ay < 600 and abs(c_x - ax) < 600:
# 判定逻辑:文本在语音下方且水平偏移不大 (放宽 Y 轴限制以包含侧边的时长文本)
# 2025-01-26: 增加 X 轴范围到 900 以适配超长语音条的右侧时长/文本
# 增加 Y 轴范围到 800 以适配多行转文字内容
if -50 < c_y - ay < 800 and abs(c_x - ax) < 900:
# 检查中间是否有其他语音图标
has_intermediate_audio = False
for other_ax, other_ay in audio_matches:
@@ -236,24 +269,35 @@ def _scan_chat_messages(image_path):
# 判定是否为系统消息
is_ignored = any(k in clean_text for k in IGNORE_CONTENT)
if not is_duration and not is_timestamp and clean_text not in MENU_KEYWORDS and not is_ignored:
# 噪音判定 (例如 "少3"")
is_noise = "" in clean_text and len(clean_text) < 8 and re.search(r'\d', clean_text)
if not is_duration and not is_timestamp and clean_text not in MENU_KEYWORDS and not is_ignored and not is_noise:
is_converted = True
# 针对 "少3"" 这种特殊噪点进行过滤,但仍标记为已转换
# 如果包含 "少" 且长度短且包含数字,视为噪点 (例如 "少3"")
if "" in clean_text and len(clean_text) < 6 and re.search(r'\d', clean_text):
logger.info(f"语音({ax},{ay}) 判定为已转换,但内容判定为噪点('{clean_text}'),置为空")
converted_trigger_text = ""
else:
converted_trigger_text = clean_text
logger.info(f"语音({ax},{ay}) 判定为已转换,关联到有效文本: '{clean_text}'")
break
associated_texts.append((c_y, c_x, clean_text))
claimed_ocr_indices.add(i)
# 不再 break继续寻找后续文本行
else:
# 这些文本虽然不作为内容,但它们属于语音消息的附属信息,标记为已处理
claimed_ocr_indices.add(i)
if is_timestamp:
logger.info(f"语音({ax},{ay}) 忽略下方时间戳文本: '{clean_text}'")
elif is_duration:
logger.info(f"语音({ax},{ay}) 忽略时长文本: '{clean_text}'")
elif is_noise:
logger.info(f"语音({ax},{ay}) 忽略噪音文本: '{clean_text}'")
elif is_ignored:
logger.info(f"语音({ax},{ay}) 忽略系统消息文本: '{clean_text}'")
else:
logger.info(f"语音({ax},{ay}) 忽略其他文本(可能是菜单): '{clean_text}'")
# 整合所有关联文本
if associated_texts:
# 按 Y 轴排序,如果 Y 接近则按 X 轴排序
associated_texts.sort(key=lambda x: (x[0], x[1]))
converted_trigger_text = "".join([t[2] for t in associated_texts])
logger.info(f"语音({ax},{ay}) 判定为已转换,最终合并文本: '{converted_trigger_text}'")
if is_converted:
logger.info(f"语音消息 ({ax}, {ay}) 已有转换文字: '{converted_trigger_text}',跳过")
@@ -275,7 +319,8 @@ def _scan_chat_messages(image_path):
})
# B. 添加文本消息
for bbox, text, conf in ocr_results:
for i, (bbox, text, conf) in enumerate(ocr_results):
if i in claimed_ocr_indices: continue
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
@@ -284,12 +329,25 @@ def _scan_chat_messages(image_path):
time_pattern = r'(\d{4}年|\d{1,2}月|\d{1,2}日|\d{1,2}:\d{2}|昨天|今天|星期|上午|下午|晚上)'
if len(text) < 20 and (re.search(time_pattern, text) or re.match(r'^[0-9:\s日年月\-]+$', text)):
logger.info(f"忽略时间戳/日期文本: '{text}'")
continue
clean_text = text.strip()
if re.match(r'^.?[0-9]{1,2}"?$', clean_text): continue
if clean_text in MENU_KEYWORDS: continue
if any(k in clean_text for k in IGNORE_CONTENT): continue
if re.match(r'^.?[0-9]{1,2}"?$', clean_text):
logger.info(f"忽略疑似时长文本: '{clean_text}'")
continue
# 噪音判定 (例如 "少3"")
if "" in clean_text and len(clean_text) < 8 and re.search(r'\d', clean_text):
logger.info(f"忽略噪音文本: '{clean_text}'")
continue
if clean_text in MENU_KEYWORDS:
logger.info(f"忽略菜单关键词: '{clean_text}'")
continue
if any(k in clean_text for k in IGNORE_CONTENT):
logger.info(f"忽略系统消息内容: '{clean_text}'")
continue
left_x = bbox[0][0]
sender = "对方" if left_x < w * 0.5 else ""
@@ -304,7 +362,7 @@ def _scan_chat_messages(image_path):
# 6. 排序
messages.sort(key=lambda x: x['y'])
return messages, debug_img
return messages, debug_img, chat_title
async def analyze_chat_image(image_path, output_path, device=None, target_name="对方", process_strategy="ALL"):
"""
@@ -339,10 +397,16 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
logger.info(f"--- 分析循环 第 {loop_count} 次 ---")
# 1. 扫描当前屏幕
messages, debug_img = _scan_chat_messages(current_image_path)
messages, debug_img, chat_title = _scan_chat_messages(current_image_path)
if messages is None: # 读取失败
return [], None
# 更新消息发送者名称 (将 "对方" 替换为 实际标题)
if chat_title and chat_title != "对方":
for m in messages:
if m['sender'] == "对方":
m['sender'] = chat_title
# 保存当前状态的调试图
if current_output_path:
cv2.imwrite(current_output_path, debug_img)
@@ -432,7 +496,7 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
peek_shot = get_next_debug_path("step_peek_content")
d.screenshot(peek_shot)
logger.info("正在读取转换后的语音内容...")
peek_messages, _ = _scan_chat_messages(peek_shot)
peek_messages, _, _ = _scan_chat_messages(peek_shot)
# 2. 查找并保存内容
found_content = None