'commit'

2026-01-21 14:13:26 +08:00
parent 701cdb1dd0
commit af46512212
7 changed files with 721 additions and 34 deletions
--- a/Test/Screenshots/chat_result_20260121_113553.jpg
+++ b/Test/Screenshots/chat_result_20260121_113553.jpg
--- a/Test/Screenshots/chat_result_analyzed.jpg
+++ b/Test/Screenshots/chat_result_analyzed.jpg
--- a/Test/T1_StartWeiXin.py
+++ b/Test/T1_StartWeiXin.py
@@ -6,19 +6,400 @@ import sys
 import os
 import cv2
 import numpy as np
+import re
+
+# 添加项目根目录到 sys.path 以便导入 Util
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if project_root not in sys.path:
+    sys.path.append(project_root)
+
+from Util.EasyOcrKit import get_easyocr_reader

 # 配置日志
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger("TestWeChat")

-def analyze_chat_image(image_path, output_path):
+def find_input_box_center(image_path):
    """
-    识别微信聊天截图中的头像并画框
+    识别底部输入框的中心坐标，返回 (center_x, center_y, rect_box)
+    rect_box 用于可视化: (x, y, w, h) (相对于原图)
+    """
+    try:
+        img_data = np.fromfile(image_path, dtype=np.uint8)
+        img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
+        if img is None:
+            return None, None
+        
+        height, width = img.shape[:2]
+        
+        # 截取底部 12% 区域 (缩小范围以精准定位)
+        bottom_h = int(height * 0.12)
+        crop_y_start = height - bottom_h
+        crop = img[crop_y_start:height, 0:width]
+        
+        # 预处理
+        gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
+        
+        # 策略1: Canny 边缘检测 (对低对比度 UI 更有效)
+        edges = cv2.Canny(gray, 50, 150)
+        # 膨胀连接断裂的边缘
+        kernel = np.ones((3,3), np.uint8)
+        dilated = cv2.dilate(edges, kernel, iterations=1)
+        
+        contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        
+        best_cnt = None
+        max_area = 0
+        
+        for cnt in contours:
+            x, y, w, h = cv2.boundingRect(cnt)
+            # 筛选条件优化：
+            # 1. 宽度: 屏幕宽度的 50% - 98% (通常输入框很长)
+            # 2. 高度: 30px - 底部区域的 90%
+            if width * 0.5 < w < width * 0.98 and 30 < h < bottom_h * 0.9:
+                # 计算中心点 Y 坐标相对于全图
+                global_y = crop_y_start + y + h // 2
+                
+                # 排除过于靠下的区域 (导航栏/手势条)，通常在最后 2%
+                if global_y > height * 0.98:
+                    continue
+                    
+                if w * h > max_area:
+                    max_area = w * h
+                    best_cnt = (x, y, w, h)
+        
+        if best_cnt:
+            x, y, w, h = best_cnt
+            center_x = x + w // 2
+            center_y = crop_y_start + y + h // 2
+            logger.info(f"找到输入框(CV-Canny): ({center_x}, {center_y}), 尺寸: {w}x{h}")
+            return (center_x, center_y), (x, crop_y_start + y, w, h)
+
+        # 策略2: 自适应阈值 (原有逻辑作为备份)
+        thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
+                                       cv2.THRESH_BINARY_INV, 11, 2)
+        contours_thresh, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        
+        for cnt in contours_thresh:
+            x, y, w, h = cv2.boundingRect(cnt)
+            if width * 0.4 < w < width * 0.95 and 20 < h < bottom_h * 0.8:
+                global_y = crop_y_start + y + h // 2
+                if global_y > height * 0.98:
+                    continue
+                if w * h > max_area:
+                    max_area = w * h
+                    best_cnt = (x, y, w, h)
+                    
+        if best_cnt:
+            x, y, w, h = best_cnt
+            center_x = x + w // 2
+            center_y = crop_y_start + y + h // 2
+            logger.info(f"找到输入框(CV-Adaptive): ({center_x}, {center_y}), 尺寸: {w}x{h}")
+            return (center_x, center_y), (x, crop_y_start + y, w, h)
+
+        # 兜底策略：使用更靠下的默认坐标 (94%)
+        # 之前的 90.5% 用户反馈偏上
+        logger.warning("未找到明显输入框轮廓，使用更靠下的默认坐标 (94%)")
+        default_y = int(height * 0.94) 
+        center_x = width // 2
+        
+        # 构造假想框
+        fake_w = int(width * 0.7)
+        fake_h = int(height * 0.08) # 稍微加高一点，视觉上更像
+        fake_x = (width - fake_w) // 2
+        fake_y = default_y - fake_h // 2
+        
+        return (center_x, default_y), (fake_x, fake_y, fake_w, fake_h)
+
+        
+    except Exception as e:
+        logger.error(f"查找输入框失败: {e}")
+        return None, None
+
+def find_send_button(d):
+    """
+    截图并寻找发送按钮 (绿色按钮)
+    扩大搜索范围以适应键盘弹出的情况
+    """
+    try:
+        # 截图到 Screenshots 目录方便调试
+        screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
+        if not os.path.exists(screenshot_dir):
+            os.makedirs(screenshot_dir)
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        debug_shot_path = os.path.join(screenshot_dir, f"debug_send_check_{timestamp}.jpg")
+        
+        d.screenshot(debug_shot_path)
+        logger.info(f"发送按钮查找调试截图已保存: {debug_shot_path}")
+        
+        img = cv2.imread(debug_shot_path)
+        if img is None:
+            return None
+            
+        h, w = img.shape[:2]
+        
+        # ROI: 底部 60% (考虑到键盘弹出，按钮可能被顶上去)
+        # 且只关注右侧 30%
+        roi_h = int(h * 0.6)
+        roi_w = int(w * 0.3)
+        y_start = h - roi_h
+        x_start = w - roi_w
+        
+        roi = img[y_start:h, x_start:w]
+        
+        # 转换 HSV
+        hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
+        
+        # 绿色范围 (WeChat Green)
+        lower_green = np.array([35, 80, 80])
+        upper_green = np.array([90, 255, 255])
+        
+        mask = cv2.inRange(hsv, lower_green, upper_green)
+        
+        # 查找轮廓
+        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        
+        if contours:
+            # 找符合条件的轮廓
+            valid_candidates = []
+            for cnt in contours:
+                area = cv2.contourArea(cnt)
+                x, y, cw, ch = cv2.boundingRect(cnt)
+                
+                # 过滤太小的噪点和太大的区域(例如全屏背景)
+                # 发送按钮通常面积在 2000-15000 之间 (视分辨率而定)
+                if 500 < area < 30000:
+                    # 宽高比检查：发送按钮通常接近正方形或微扁 (ratio < 2.5)
+                    ratio = float(cw) / ch
+                    if 0.5 < ratio < 3.0:
+                        # 坐标还原到原图
+                        global_y = y_start + y
+                        valid_candidates.append({
+                            'cnt': cnt,
+                            'area': area,
+                            'y': global_y,
+                            'rect': (x, y, cw, ch)
+                        })
+            
+            if valid_candidates:
+                # 核心逻辑：发送按钮一定是所有绿色元素中最靠下的 (Y坐标最大)
+                # 且在最右侧
+                # 先按 Y 坐标降序排序
+                valid_candidates.sort(key=lambda c: c['y'], reverse=True)
+                
+                # 取最靠下的一个 (可能是发送按钮)
+                best = valid_candidates[0]
+                
+                # 获取中心点
+                bx, by, bw, bh = best['rect']
+                cx = x_start + bx + bw // 2
+                cy = y_start + by + bh // 2
+                
+                logger.info(f"通过图像识别找到发送按钮 (Bottom-Most): ({cx}, {cy}), 面积: {best['area']}")
+                return cx, cy
+                
+        logger.warning("未通过图像识别找到绿色发送按钮")
+        return None
+        
+    except Exception as e:
+        logger.error(f"查找发送按钮出错: {e}")
+        return None
+
+def take_debug_screenshot(d, step_name):
+    """
+    调试专用截图函数
+    """
+    try:
+        screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
+        if not os.path.exists(screenshot_dir):
+            os.makedirs(screenshot_dir)
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        filename = f"debug_{timestamp}_{step_name}.jpg"
+        save_path = os.path.join(screenshot_dir, filename)
+        d.screenshot(save_path)
+        logger.info(f"📸 [调试截图] {step_name} 已保存: {filename}")
+        return save_path
+    except Exception as e:
+        logger.error(f"截图失败 ({step_name}): {e}")
+        return None
+
+def perform_input_action(coords, text):
+    """
+    点击坐标并输入文本
+    """
+    # 优先尝试使用 uiautomator2 的原生控件查找 (更稳健)
+    native_success = False
+    try:
+        d = u2.connect()
+        # 查找 EditText 控件
+        input_elem = d(className="android.widget.EditText")
+        
+        if input_elem.exists:
+            logger.info("发现原生输入框控件，尝试点击...")
+            
+            # 1. 截图：点击前
+            take_debug_screenshot(d, "native_01_before_click")
+            
+            # 双击策略
+            input_elem.click()
+            time.sleep(0.5)
+            input_elem.click()
+            time.sleep(1)
+            
+            # 2. 截图：点击后 (预期键盘弹出)
+            take_debug_screenshot(d, "native_02_after_click_keyboard")
+            
+            logger.info(f"输入文本: {text}")
+            
+            # 尝试 set_text + send_keys 组合
+            try:
+                input_elem.set_text(text)
+            except:
+                pass
+                
+            time.sleep(0.5)
+            
+            # 检查文本是否输入成功，如果没有，尝试 send_keys
+            try:
+                current_text = input_elem.get_text()
+                if not current_text or current_text != text:
+                    logger.warning(f"set_text 似乎未生效 (当前: {current_text})，尝试 send_keys...")
+                    d.send_keys(text)
+            except:
+                d.send_keys(text)
+                
+            # 3. 截图：输入文本后
+            take_debug_screenshot(d, "native_03_after_text_input")
+
+            # 尝试发送回车键
+            time.sleep(0.5)
+            d.press("enter")
+            
+            # 尝试点击发送按钮
+            try:
+                if d(text="发送").exists:
+                    d(text="发送").click()
+                    logger.info("已点击 '发送' 按钮 (Native Text)")
+                    take_debug_screenshot(d, "native_04_after_send_click_text")
+                else:
+                    logger.info("未找到 '发送' 文本控件，尝试图像识别...")
+                    send_btn_coords = find_send_button(d)
+                    if send_btn_coords:
+                        sx, sy = send_btn_coords
+                        d.click(sx, sy)
+                        logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}")
+                        take_debug_screenshot(d, "native_04_after_send_click_image")
+                    else:
+                        width, height = d.window_size()
+                        fallback_x = int(width * 0.9)
+                        fallback_y = int(height * 0.965)
+                        logger.info(f"未识别到发送按钮，尝试盲点右下角: {fallback_x}, {fallback_y}")
+                        d.click(fallback_x, fallback_y)
+                        take_debug_screenshot(d, "native_04_after_send_click_fallback")
+            except Exception as e:
+                logger.error(f"点击发送按钮失败: {e}")
+
+            logger.info("输入完成 (Native)")
+            native_success = True
+            return
+
+        else:
+            logger.warning("未找到输入框元素 (Native)，转入坐标点击模式...")
+
+    except Exception as e:
+        logger.warning(f"原生控件操作失败，降级为坐标点击: {e}")
+
+    if native_success:
+        return
+
+    # 降级方案：使用坐标点击
+    if not coords:
+        logger.error("坐标无效，无法执行点击输入")
+        return
+
+    x, y = coords
+    
+    # 坐标安全检查
+    if 'd' not in locals():
+        d = u2.connect()
+        
+    try:
+        width, height = d.window_size()
+        # 移除过于激进的坐标修正 (95% -> 93% 会导致点到输入框上方)
+        # 输入框中心通常在 96%-97% 左右
+        if y > height * 0.99:
+             logger.warning(f"检测到的输入框坐标 y={y} 过于靠底，修正为 {height * 0.97}")
+             y = int(height * 0.97)
+    except:
+        pass
+
+    try:
+        logger.info(f"设备连接成功: {d.info.get('serial')}")
+        
+        # 1. 截图：点击输入框前
+        take_debug_screenshot(d, "coord_01_before_click_input")
+        
+        # 点击输入框
+        logger.info(f"点击坐标: ({x}, {y})")
+        d.click(x, y)
+        time.sleep(0.5)
+        d.click(x, y) # Double click
+        
+        # 等待键盘弹出
+        time.sleep(1.5)
+        
+        # 2. 截图：点击输入框后
+        take_debug_screenshot(d, "coord_02_after_click_input_keyboard")
+        
+        # 输入文本
+        logger.info(f"输入文本 (SendKeys): {text}")
+        try:
+             d.send_keys(text)
+        except Exception as sk_e:
+             logger.error(f"send_keys 失败: {sk_e}")
+        
+        # 3. 截图：输入文本后
+        take_debug_screenshot(d, "coord_03_after_input_text")
+        
+        time.sleep(0.5)
+        d.press("enter")
+        
+        # 尝试查找发送按钮并点击
+        try:
+            if d(text="发送").exists:
+                d(text="发送").click()
+                logger.info("已点击 '发送' 按钮 (Native Text)")
+                take_debug_screenshot(d, "coord_04_after_click_send_native")
+            else:
+                logger.info("未找到 '发送' 文本控件，尝试图像识别...")
+                send_btn_coords = find_send_button(d)
+                if send_btn_coords:
+                    sx, sy = send_btn_coords
+                    d.click(sx, sy)
+                    logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}")
+                    take_debug_screenshot(d, "coord_04_after_click_send_image")
+                else:
+                    width, height = d.window_size()
+                    fallback_x = int(width * 0.9)
+                    fallback_y = int(height * 0.965)
+                    logger.info(f"未识别到发送按钮，尝试盲点右下角: {fallback_x}, {fallback_y}")
+                    d.click(fallback_x, fallback_y)
+                    take_debug_screenshot(d, "coord_04_after_click_send_fallback")
+        except Exception as e:
+            logger.error(f"点击发送按钮失败: {e}")
+        
+        logger.info("输入完成 (Coordinate)")
+        
+    except Exception as e:
+        logger.error(f"自动化操作失败: {e}")
+
+def analyze_chat_image(image_path, output_path, target_name="对方"):
+    """
+    识别微信聊天截图中的头像并画框，识别对话内容
    """
    logger.info(f"正在分析图片: {image_path}")
    
-    # 读取图片
-    # 注意：cv2.imread 不支持中文路径，需要用 np.fromfile 读取
+    # 读取图片（支持中文路径）
    try:
        img_data = np.fromfile(image_path, dtype=np.uint8)
        img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
@@ -29,10 +410,16 @@ def analyze_chat_image(image_path, output_path):
    if img is None:
        logger.error("图片读取为空")
        return
-
+    
+    # 备份一份干净的图片用于 OCR (避免识别到画上去的框)
+    img_clean = img.copy()
+    
    height, width = img.shape[:2]
    logger.info(f"图片尺寸: {width}x{height}")

+    # 调用输入框识别，获取坐标和可视化框
+    input_center, input_rect = find_input_box_center(image_path)
+
    # 1. 预处理
    # 转为灰度图
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -49,7 +436,8 @@ def analyze_chat_image(image_path, output_path):
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    logger.info(f"检测到轮廓数量: {len(contours)}")
    
-    avatar_count = 0
+    # 收集所有符合条件的头像
+    avatars = []
    
    # 3. 筛选轮廓
    for contour in contours:
@@ -69,7 +457,7 @@ def analyze_chat_image(image_path, output_path):
        # 排除底部输入框区域 (假设底部 10% 为输入区域)
        if y > height * 0.9:
            continue
-
+        
        # 左侧头像：靠左边 (x < width * 0.18)
        # 右侧头像：靠右边 (x > width * 0.82)
        is_left = x < width * 0.18
@@ -77,22 +465,124 @@ def analyze_chat_image(image_path, output_path):
        
        if 0.8 <= aspect_ratio <= 1.2 and min_w < w < max_w:
            if is_left or is_right:
-                # 确定颜色
-                # 左侧：蓝色 (BGR: 255, 0, 0)
-                # 右侧：黄色 (BGR: 0, 255, 255)
-                color = (255, 0, 0) if is_left else (0, 255, 255)
-                
-                # 绘制矩形框，线宽为 3
-                cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
-                
-                label = "Left" if is_left else "Right"
-                avatar_count += 1
-                logger.info(f"找到头像: 位置=({x},{y}), 尺寸={w}x{h}, 侧别={label}")
+                side = "Left" if is_left else "Right"
+                avatars.append({
+                    'x': x, 'y': y, 'w': w, 'h': h,
+                    'side': side
+                })

-    logger.info(f"共标记了 {avatar_count} 个头像")
+    # 按 y 坐标排序
+    avatars.sort(key=lambda a: a['y'])
+    
+    logger.info(f"找到有效头像数量: {len(avatars)}")

-    # 4. 保存结果
+    # 初始化 OCR
    try:
+        reader = get_easyocr_reader(gpu=True)
+        logger.info("OCR 初始化成功")
+    except Exception as e:
+        logger.error(f"OCR 初始化失败: {e}")
+        reader = None
+    
+    dialogue_log = []
+
+    # 4. 绘制对话内容框 (Green/Red Boxes)
+    if avatars:
+        i = 0
+        while i < len(avatars):
+            current_group_start = i
+            current_side = avatars[i]['side']
+            
+            # 找到当前组的结束位置 (即下一个不同侧头像的索引)
+            j = i + 1
+            while j < len(avatars) and avatars[j]['side'] == current_side:
+                j += 1
+            
+            # Start Y: 当前组第一个头像的上方 (例如 -10px)
+            start_y = max(0, avatars[i]['y'] - 10)
+            
+            # End Y
+            if j < len(avatars):
+                end_y = max(start_y + 10, avatars[j]['y'] - 30)
+            else:
+                # 最后一个框的底边，使用输入框的上沿
+                if input_rect:
+                    _, input_y, _, _ = input_rect
+                    end_y = max(start_y + 10, input_y - 10)
+                else:
+                    end_y = int(height * 0.9) # 默认
+            
+            # 绘制大框
+            # 左侧 (Left) -> 对方 -> 绿色 (0, 255, 0)
+            # 右侧 (Right) -> 我 -> 红色 (0, 0, 255)
+            box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)
+            
+            cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)
+            
+            logger.info(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
+            
+            # OCR 识别区域文本
+            if reader:
+                try:
+                    safe_start_y = max(0, start_y)
+                    safe_end_y = min(height, end_y)
+                    
+                    if safe_end_y > safe_start_y:
+                        roi_img = img_clean[safe_start_y:safe_end_y, 0:width]
+                        
+                        # 识别
+                        results = reader.read_text(roi_img)
+                        
+                        # 过滤关键词
+                        ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]
+                        
+                        texts = []
+                        for res in results:
+                            text = res[1]
+                            prob = res[2]
+                            if prob > 0.3:
+                                # 1. 检查是否包含屏蔽词
+                                if any(kw in text for kw in ignore_keywords):
+                                    continue
+                                
+                                # 2. 检查是否为单行时间
+                                if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
+                                    continue
+                                    
+                                texts.append(text)
+                                
+                        combined_text = " ".join(texts)
+                        
+                        if combined_text.strip():
+                            role = target_name if current_side == "Left" else "我"
+                            dialogue_log.append(f"{role}: {combined_text}")
+                            logger.info(f"  -> OCR结果: {combined_text}")
+                except Exception as e:
+                    logger.error(f"  -> OCR出错: {e}")
+            
+            # 移动到下一组
+            i = j
+
+    # 5. 绘制头像框 (Blue/Yellow Boxes) - 画在内容框之上
+    for av in avatars:
+        x, y, w, h = av['x'], av['y'], av['w'], av['h']
+        color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)
+        
+        cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
+        logger.info(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")
+
+    # 6. 保存结果
+    try:
+        # 如果有识别到输入框，画出来 (紫色)
+        if input_rect:
+            rx, ry, rw, rh = input_rect
+            cv2.rectangle(img, (rx, ry), (rx + rw, ry + rh), (255, 0, 255), 5)
+            # 画中心点
+            if input_center:
+                cx, cy = input_center
+                cv2.circle(img, (cx, cy), 10, (255, 0, 255), -1)
+            logger.info(f"已绘制输入框标记: {input_rect}")
+
        # cv2.imwrite 不支持中文路径，使用 imencode + tofile
        ext = os.path.splitext(output_path)[1]
        cv2.imencode(ext, img)[1].tofile(output_path)
@@ -100,6 +590,12 @@ def analyze_chat_image(image_path, output_path):
    except Exception as e:
        logger.error(f"保存分析图片失败: {e}")

+    logger.info("\n" + "="*30)
+    logger.info("对话内容汇总:")
+    for line in dialogue_log:
+        logger.info(line)
+    logger.info("="*30 + "\n")
+
 def main():
    logger.info("开始执行微信搜索测试...")
    
@@ -197,11 +693,25 @@ def main():
            logger.info(f"✅ 原始截图已保存: {save_path}")
            
            # 6. 分析截图并标记头像
-            logger.info("步骤 6: 自动标记头像...")
+            logger.info("步骤 6: 自动标记头像和识别内容...")
            analyzed_filename = f"chat_result_{timestamp}_analyzed.jpg"
            analyzed_path = os.path.join(screenshot_dir, analyzed_filename)
            
-            analyze_chat_image(save_path, analyzed_path)
+            analyze_chat_image(save_path, analyzed_path, target_name=target_name)
+            
+            # 7. 查找输入框并执行自动化操作
+            logger.info("步骤 7: 自动回复...")
+            # 注意：analyze_chat_image 内部已经调用了 find_input_box_center 并且画在图上了
+            # 但我们需要返回值来执行操作。analyze_chat_image 并没有返回坐标。
+            # 为了简单，再次调用一次 (或者修改 analyze_chat_image 返回坐标，但改动较大)
+            # 这里直接再次调用 find_input_box_center 获取坐标
+            coords, _ = find_input_box_center(save_path)
+            
+            if coords:
+                logger.info(f"输入框坐标: {coords}")
+                perform_input_action(coords, "AI助手我现在可以开始和你聊天了！")
+            else:
+                logger.warning("未找到输入框坐标，跳过回复")
            
        except Exception as e:
            logger.error(f"❌ 截图或分析失败: {e}")
--- a/Test/T2_testWeXinChat.py
+++ b/Test/T2_testWeXinChat.py
@@ -126,9 +126,9 @@ def analyze_chat_image(image_path, output_path):
            # 注意 OpenCV 颜色是 BGR
            box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)
            
-            # 绘制矩形 (空心，线宽 2)
+            # 绘制矩形 (空心，线宽 5)
            # X 轴范围：0 到 width
-            cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 2)
+            cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)
            
            logger.info(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
            
@@ -142,7 +142,7 @@ def analyze_chat_image(image_path, output_path):
        # 右侧：黄色 (BGR: 0, 255, 255)
        color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)
        
-        cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
+        cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
        logger.info(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")

    logger.info(f"共标记了 {len(avatars)} 个头像")
--- a/Test/pycache/T1_StartWeiXin.cpython-310.pyc
+++ b/Test/pycache/T1_StartWeiXin.cpython-310.pyc
--- a/Test/analyze_chat_avatars.py
+++ b/Test/analyze_chat_avatars.py
@@ -1,24 +1,126 @@
 # coding=utf-8
 import cv2
 import numpy as np
+import sys
 import os
+import logging
+import re
+import time
+try:
+    import uiautomator2 as u2
+except ImportError:
+    u2 = None

-def analyze_chat_image(image_path, output_path):
-    print(f"正在读取图片: {image_path}")
-    
-    # 读取图片
-    # 注意：cv2.imread 不支持中文路径，需要用 np.fromfile 读取
+# 添加项目根目录到 sys.path 以便导入 Util
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if project_root not in sys.path:
+    sys.path.append(project_root)
+
+from Util.EasyOcrKit import get_easyocr_reader
+
+# 配置日志
+logging.basicConfig(level=logging.INFO, format='%(message)s')
+logger = logging.getLogger(__name__)
+
+def find_input_box_center(image_path):
+    """
+    识别底部输入框的中心坐标
+    """
    try:
        img_data = np.fromfile(image_path, dtype=np.uint8)
        img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
+        if img is None:
+            return None
+            
+        height, width = img.shape[:2]
+        
+        # 截取底部 15% 区域
+        bottom_h = int(height * 0.15)
+        crop_y_start = height - bottom_h
+        crop = img[crop_y_start:height, 0:width]
+        
+        # 预处理
+        gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
+        # 简单二值化，输入框通常是浅色或深色背景上的反色
+        # 这里假设深色模式下，输入框可能较亮，或者有边框
+        # 尝试自适应阈值
+        thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
+                                       cv2.THRESH_BINARY_INV, 11, 2)
+        
+        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        
+        best_cnt = None
+        max_area = 0
+        
+        for cnt in contours:
+            x, y, w, h = cv2.boundingRect(cnt)
+            # 筛选条件：宽度较大 (比如 > 50% 屏幕宽)，高度适中
+            if w > width * 0.5 and h > 20:
+                if w * h > max_area:
+                    max_area = w * h
+                    best_cnt = (x, y, w, h)
+        
+        if best_cnt:
+            x, y, w, h = best_cnt
+            center_x = x + w // 2
+            center_y = crop_y_start + y + h // 2
+            logger.info(f"找到输入框: ({center_x}, {center_y}), 尺寸: {w}x{h}")
+            return center_x, center_y
+        else:
+            # 兜底：返回底部区域中心
+            logger.warning("未找到明显输入框轮廓，使用默认坐标")
+            return width // 2, int(height * 0.95)
+            
    except Exception as e:
-        print(f"读取图片失败: {e}")
+        logger.error(f"查找输入框失败: {e}")
+        return None
+
+def perform_input_action(coords, text):
+    """
+    点击坐标并输入文本
+    """
+    if u2 is None:
+        logger.error("未安装 uiautomator2 库，无法执行自动化操作")
        return

-    if img is None:
-        print("图片读取为空")
+    if not coords:
+        logger.error("坐标无效，无法执行点击输入")
        return

+    x, y = coords
+    try:
+        # 连接设备 (默认连接第一个 USB 设备)
+        d = u2.connect() 
+        logger.info(f"设备连接成功: {d.info.get('serial')}")
+        
+        # 点击输入框
+        logger.info(f"点击坐标: ({x}, {y})")
+        d.click(x, y)
+        
+        # 等待键盘弹出或输入框激活
+        time.sleep(1)
+        
+        # 输入文本
+        logger.info(f"输入文本: {text}")
+        d.send_keys(text)
+        
+        # 尝试发送回车键以确认输入 (视具体情况而定)
+        # d.press("enter")
+        
+        logger.info("输入完成")
+        
+    except Exception as e:
+        logger.error(f"自动化操作失败: {e}")
+        print(f"自动化操作失败: {e}")
+
+def analyze_chat_image(image_path, output_path, target_name="对方"):
+    # 读取图片（支持中文路径）
+    img_data = np.fromfile(image_path, dtype=np.uint8)
+    img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
+    
+    # 备份一份干净的图片用于 OCR (避免识别到画上去的框)
+    img_clean = img.copy()
+    
    height, width = img.shape[:2]
    print(f"图片尺寸: {width}x{height}")

@@ -79,6 +181,16 @@ def analyze_chat_image(image_path, output_path):
    
    print(f"找到有效头像数量: {len(avatars)}")

+    # 初始化 OCR
+    try:
+        reader = get_easyocr_reader(gpu=True)
+        print("OCR 初始化成功")
+    except Exception as e:
+        print(f"OCR 初始化失败: {e}")
+        reader = None
+    
+    dialogue_log = []
+
    # 4. 绘制对话内容框 (Green/Red Boxes)
    # 策略：按顺序遍历头像，如果发现同侧连续，则视为一组。
    # 从当前组的第一个头像上方开始，直到下一个不同侧的头像上方（或底部）。
@@ -122,6 +234,49 @@ def analyze_chat_image(image_path, output_path):
            
            print(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
            
+            # OCR 识别区域文本
+            if reader:
+                try:
+                    # 从原图(img_clean)裁剪区域
+                    # 注意边界检查
+                    safe_start_y = max(0, start_y)
+                    safe_end_y = min(height, end_y)
+                    
+                    if safe_end_y > safe_start_y:
+                        roi_img = img_clean[safe_start_y:safe_end_y, 0:width]
+                        
+                        # 识别
+                        results = reader.read_text(roi_img)
+                        
+                        # 过滤关键词
+                        ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]
+                        
+                        # 过滤并拼接文本 (置信度 > 0.3)
+                        texts = []
+                        for res in results:
+                            text = res[1]
+                            prob = res[2]
+                            if prob > 0.3:
+                                # 1. 检查是否包含屏蔽词
+                                if any(kw in text for kw in ignore_keywords):
+                                    continue
+                                
+                                # 2. 检查是否为单行时间 (如 11:35, 09:00)
+                                # 正则匹配: 只有时间格式，没有其他文字
+                                if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
+                                    continue
+                                    
+                                texts.append(text)
+                                    
+                        combined_text = " ".join(texts)
+                        
+                        if combined_text.strip():
+                            role = target_name if current_side == "Left" else "我"
+                            dialogue_log.append(f"{role}: {combined_text}")
+                            print(f"  -> OCR结果: {combined_text}")
+                except Exception as e:
+                    print(f"  -> OCR出错: {e}")
+
            # 移动到下一组
            i = j

@@ -144,10 +299,32 @@ def analyze_chat_image(image_path, output_path):
    except Exception as e:
        print(f"保存图片失败: {e}")

+    print("\n" + "="*30)
+    print("对话内容汇总:")
+    for line in dialogue_log:
+        print(line)
+    print("="*30 + "\n")
+
 if __name__ == "__main__":
    # 输入文件路径
    input_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_20260121_113553.jpg"
    # 输出文件路径
    output_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_analyzed.jpg"
    
-    analyze_chat_image(input_file, output_file)
+    # 目标联系人名称 (对应搜索关键字)
+    target_name = "糖豆爸爸"
+    
+    analyze_chat_image(input_file, output_file, target_name=target_name)
+    
+    # 2. 查找输入框并执行自动化操作
+    print("\n" + "="*30)
+    print("开始执行自动化输入...")
+    
+    # 注意: 这里使用 input_file (截图) 来定位坐标
+    # 前提是截图时的界面布局与当前设备界面一致
+    coords = find_input_box_center(input_file)
+    if coords:
+        print(f"输入框坐标: {coords}")
+        perform_input_action(coords, "AI助手我现在可以开始和你聊天了！")
+    else:
+        print("未找到输入框坐标")
--- a/Test/temp_send_check.jpg
+++ b/Test/temp_send_check.jpg