'commit'

2026-01-26 19:57:19 +08:00
parent 236171e015
commit cb72f02030
3 changed files with 108 additions and 35 deletions
--- a/WeiXin/T2_ChatMonitor.py
+++ b/WeiXin/T2_ChatMonitor.py
@@ -144,6 +144,16 @@ class ChatMonitorBot:
        """主运行循环"""
        logger.info("🚀 大张老师自动巡课系统启动 (T2 增强版)...")
        
+        # 定义 JSON 序列化辅助函数
+        def numpy_serializer(obj):
+            if isinstance(obj, np.integer):
+                return int(obj)
+            if isinstance(obj, np.floating):
+                return float(obj)
+            if isinstance(obj, np.ndarray):
+                return obj.tolist()
+            raise TypeError(f"Type {type(obj)} not serializable")
+        
        # 1. 环境准备
        if not self.step_1_prepare_env(): return
        if not self.step_2_connect_device(): return
@@ -171,22 +181,18 @@ class ChatMonitorBot:
                # 格式化输出：[发送者] 内容 (类型)
                sender = msg.get('sender', '未知')
                content = msg.get('content', '')
-                msg_type = msg.get('type', 'unknown')
-                logger.info(f"[{sender}] {content} ({msg_type})")
+                msg_type = "语音" if msg.get('type') == 'voice' else "文字"
+                
+                # 按照用户要求的格式输出
+                logger.info(f"说话人: {sender}")
+                logger.info(f"消息类型: {msg_type}")
+                logger.info(f"消息内容: {content}")
+                logger.info("-" * 20)
            logger.info("="*50 + "\n")
            
            # 初始化最后处理的消息哈希，避免重复回复第一条
            last_msg = self.dialogue_log[-1]
            # last_msg 是字典，需要转字符串再 encode
-            def numpy_serializer(obj):
-                if isinstance(obj, np.integer):
-                    return int(obj)
-                if isinstance(obj, np.floating):
-                    return float(obj)
-                if isinstance(obj, np.ndarray):
-                    return obj.tolist()
-                raise TypeError(f"Type {type(obj)} not serializable")
-
            msg_str = json.dumps(last_msg, sort_keys=True, ensure_ascii=False, default=numpy_serializer)
            self.last_processed_msg_hash = hashlib.md5(msg_str.encode('utf-8')).hexdigest()
            self.last_screen_hash = self.get_image_hash(self.screenshot_path)
@@ -233,10 +239,13 @@ class ChatMonitorBot:
                
                # D. 只关注最后一条消息
                last_msg = dialogue_log[-1]
-                current_msg_hash = hashlib.md5(last_msg.encode('utf-8')).hexdigest()
+                # last_msg 是字典，需要序列化
+                msg_str = json.dumps(last_msg, sort_keys=True, ensure_ascii=False, default=numpy_serializer)
+                current_msg_hash = hashlib.md5(msg_str.encode('utf-8')).hexdigest()
                
                # E. 判断是否需要回复 (对方发送且非重复消息)
-                if "对方:" in last_msg:
+                sender = last_msg.get('sender', '')
+                if sender == "对方":
                    if current_msg_hash != self.last_processed_msg_hash:
                        event_shot = WxUtil.get_next_debug_path("event_new_msg")
                        self.device.screenshot(event_shot)
--- a/WeiXin/WxUtil.py
+++ b/WeiXin/WxUtil.py
@@ -173,10 +173,35 @@ def _scan_chat_messages(image_path):
    logger.info("正在执行 OCR 识别...")
    ocr_results = ocr_kit.read_text(image_path)
    
+    # 4.5 尝试提取聊天标题 (对方昵称)
+    chat_title = "对方"
+    potential_titles = []
+    for bbox, text, conf in ocr_results:
+        c_y = int((bbox[0][1] + bbox[2][1]) / 2)
+        c_x = int((bbox[0][0] + bbox[2][0]) / 2)
+        # 标题区域通常在顶部 (状态栏下方，消息列表上方)
+        if 60 < c_y < 140:
+            clean = text.strip()
+            # 排除时间、信号、返回按钮等
+            if re.match(r'^\d{1,2}:\d{2}$', clean): continue
+            if "微信" in clean or "WeChat" in clean: continue
+            if clean in ["<", "返回", "消息", "(", ")"]: continue
+            if re.match(r'^\d+$', clean): continue # 排除纯数字(如未读数)
+            if len(clean) > 0:
+                potential_titles.append((c_x, clean))
+    
+    if potential_titles:
+        # 优先取最接近水平中心的文本作为标题
+        potential_titles.sort(key=lambda x: abs(x[0] - w/2))
+        chat_title = potential_titles[0][1]
+        # 去除可能包含的括号(比如备注名后的群聊人数，虽然后面会被截断)
+        chat_title = re.sub(r'\(\d+\)$', '', chat_title).strip()
+        logger.info(f"识别到聊天标题/对方昵称: {chat_title}")
+
    # 微信菜单关键字（用于排除干扰）
    MENU_KEYWORDS = ["听筒播放", "收藏", "背景播放", "删除", "多选", "取消转文字", "转文字", "引用", "提醒"]
    # 忽略的系统消息内容
-    IGNORE_CONTENT = ["撤回了一条消息", "打招呼的消息", "拍了拍", "你撤回了一条消息", "引用"]
+    IGNORE_CONTENT = ["撤回了一条消息", "打招呼的消息", "拍了拍", "你撤回了一条消息", "引用", "Clear Text", "Switch IME", "Done"]

    # 5. 整合所有消息
    messages = []
@@ -188,6 +213,8 @@ def _scan_chat_messages(image_path):
    cv2.putText(debug_img, "TOP_FILTER", (10, 140), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
    cv2.putText(debug_img, "BOTTOM_FILTER", (10, h - 110), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)

+    claimed_ocr_indices = set()
+
    # A. 添加语音消息
    for ax, ay in audio_matches:
        # 标记所有找到的语音图标 (用于调试)
@@ -211,12 +238,18 @@ def _scan_chat_messages(image_path):
        # 改进：判断是否已转文字
        is_converted = False
        converted_trigger_text = ""
-        for bbox, text, conf in ocr_results:
+        associated_texts = [] # 存储关联的多行文本 [(y, x, text)]
+        
+        for i, (bbox, text, conf) in enumerate(ocr_results):
+            if i in claimed_ocr_indices: continue
+
            c_x = int((bbox[0][0] + bbox[2][0]) / 2)
            c_y = int((bbox[0][1] + bbox[2][1]) / 2)
            
-            # 判定逻辑：文本在语音下方且水平偏移不大
-            if 30 < c_y - ay < 600 and abs(c_x - ax) < 600:
+            # 判定逻辑：文本在语音下方且水平偏移不大 (放宽 Y 轴限制以包含侧边的时长文本)
+            # 2025-01-26: 增加 X 轴范围到 900 以适配超长语音条的右侧时长/文本
+            # 增加 Y 轴范围到 800 以适配多行转文字内容
+            if -50 < c_y - ay < 800 and abs(c_x - ax) < 900:
                # 检查中间是否有其他语音图标
                has_intermediate_audio = False
                for other_ax, other_ay in audio_matches:
@@ -236,24 +269,35 @@ def _scan_chat_messages(image_path):
                # 判定是否为系统消息
                is_ignored = any(k in clean_text for k in IGNORE_CONTENT)
                
-                if not is_duration and not is_timestamp and clean_text not in MENU_KEYWORDS and not is_ignored:
+                # 噪音判定 (例如 "少3"")
+                is_noise = "少" in clean_text and len(clean_text) < 8 and re.search(r'\d', clean_text)
+
+                if not is_duration and not is_timestamp and clean_text not in MENU_KEYWORDS and not is_ignored and not is_noise:
                    is_converted = True
-                    # 针对 "少3"" 这种特殊噪点进行过滤，但仍标记为已转换
-                    # 如果包含 "少" 且长度短且包含数字，视为噪点 (例如 "少3"")
-                    if "少" in clean_text and len(clean_text) < 6 and re.search(r'\d', clean_text):
-                        logger.info(f"语音({ax},{ay}) 判定为已转换，但内容判定为噪点('{clean_text}')，置为空")
-                        converted_trigger_text = ""
-                    else:
-                        converted_trigger_text = clean_text
-                        logger.info(f"语音({ax},{ay}) 判定为已转换，关联到有效文本: '{clean_text}'")
-                    break
+                    associated_texts.append((c_y, c_x, clean_text))
+                    claimed_ocr_indices.add(i)
+                    # 不再 break，继续寻找后续文本行
                else:
+                    # 这些文本虽然不作为内容，但它们属于语音消息的附属信息，标记为已处理
+                    claimed_ocr_indices.add(i)
+                    
                    if is_timestamp:
                        logger.info(f"语音({ax},{ay}) 忽略下方时间戳文本: '{clean_text}'")
                    elif is_duration:
                        logger.info(f"语音({ax},{ay}) 忽略时长文本: '{clean_text}'")
+                    elif is_noise:
+                        logger.info(f"语音({ax},{ay}) 忽略噪音文本: '{clean_text}'")
                    elif is_ignored:
                        logger.info(f"语音({ax},{ay}) 忽略系统消息文本: '{clean_text}'")
+                    else:
+                         logger.info(f"语音({ax},{ay}) 忽略其他文本(可能是菜单): '{clean_text}'")
+        
+        # 整合所有关联文本
+        if associated_texts:
+            # 按 Y 轴排序，如果 Y 接近则按 X 轴排序
+            associated_texts.sort(key=lambda x: (x[0], x[1]))
+            converted_trigger_text = "".join([t[2] for t in associated_texts])
+            logger.info(f"语音({ax},{ay}) 判定为已转换，最终合并文本: '{converted_trigger_text}'")
        
        if is_converted:
            logger.info(f"语音消息 ({ax}, {ay}) 已有转换文字: '{converted_trigger_text}'，跳过")
@@ -275,7 +319,8 @@ def _scan_chat_messages(image_path):
        })
        
    # B. 添加文本消息
-    for bbox, text, conf in ocr_results:
+    for i, (bbox, text, conf) in enumerate(ocr_results):
+        if i in claimed_ocr_indices: continue
        c_x = int((bbox[0][0] + bbox[2][0]) / 2)
        c_y = int((bbox[0][1] + bbox[2][1]) / 2)
        
@@ -284,12 +329,25 @@ def _scan_chat_messages(image_path):
        
        time_pattern = r'(\d{4}年|\d{1,2}月|\d{1,2}日|\d{1,2}:\d{2}|昨天|今天|星期|上午|下午|晚上)'
        if len(text) < 20 and (re.search(time_pattern, text) or re.match(r'^[0-9:\s日年月\-]+$', text)):
+            logger.info(f"忽略时间戳/日期文本: '{text}'")
            continue
        
        clean_text = text.strip()
-        if re.match(r'^.?[0-9]{1,2}"?$', clean_text): continue
-        if clean_text in MENU_KEYWORDS: continue
-        if any(k in clean_text for k in IGNORE_CONTENT): continue
+        if re.match(r'^.?[0-9]{1,2}"?$', clean_text): 
+            logger.info(f"忽略疑似时长文本: '{clean_text}'")
+            continue
+            
+        # 噪音判定 (例如 "少3"")
+        if "少" in clean_text and len(clean_text) < 8 and re.search(r'\d', clean_text):
+             logger.info(f"忽略噪音文本: '{clean_text}'")
+             continue
+
+        if clean_text in MENU_KEYWORDS: 
+            logger.info(f"忽略菜单关键词: '{clean_text}'")
+            continue
+        if any(k in clean_text for k in IGNORE_CONTENT): 
+            logger.info(f"忽略系统消息内容: '{clean_text}'")
+            continue
            
        left_x = bbox[0][0]
        sender = "对方" if left_x < w * 0.5 else "我"
@@ -304,7 +362,7 @@ def _scan_chat_messages(image_path):
        
    # 6. 排序
    messages.sort(key=lambda x: x['y'])
-    return messages, debug_img
+    return messages, debug_img, chat_title

 async def analyze_chat_image(image_path, output_path, device=None, target_name="对方", process_strategy="ALL"):
    """
@@ -339,10 +397,16 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
            logger.info(f"--- 分析循环 第 {loop_count} 次 ---")
            
            # 1. 扫描当前屏幕
-            messages, debug_img = _scan_chat_messages(current_image_path)
+            messages, debug_img, chat_title = _scan_chat_messages(current_image_path)
            if messages is None: # 读取失败
                return [], None
-                
+            
+            # 更新消息发送者名称 (将 "对方" 替换为 实际标题)
+            if chat_title and chat_title != "对方":
+                for m in messages:
+                    if m['sender'] == "对方":
+                        m['sender'] = chat_title
+
            # 保存当前状态的调试图
            if current_output_path:
                cv2.imwrite(current_output_path, debug_img)
@@ -432,7 +496,7 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
                peek_shot = get_next_debug_path("step_peek_content")
                d.screenshot(peek_shot)
                logger.info("正在读取转换后的语音内容...")
-                peek_messages, _ = _scan_chat_messages(peek_shot)
+                peek_messages, _, _ = _scan_chat_messages(peek_shot)
                
                # 2. 查找并保存内容
                found_content = None
--- a/WeiXin/pycache/WxUtil.cpython-310.pyc
+++ b/WeiXin/pycache/WxUtil.cpython-310.pyc