aiData/Test/analyze_chat_avatars.py

# coding=utf-8
import cv2
import numpy as np
import sys
import os
import logging
import re
import time
try:
    import uiautomator2 as u2
except ImportError:
    u2 = None

# 添加项目根目录到 sys.path 以便导入 Util
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
    sys.path.append(project_root)

from Util.EasyOcrKit import get_easyocr_reader

# 配置日志
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger(__name__)

def find_input_box_center(image_path):
    """
    识别底部输入框的中心坐标
    """
    try:
        img_data = np.fromfile(image_path, dtype=np.uint8)
        img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
        if img is None:
            return None

        height, width = img.shape[:2]

        # 截取底部 15% 区域
        bottom_h = int(height * 0.15)
        crop_y_start = height - bottom_h
        crop = img[crop_y_start:height, 0:width]

        # 预处理
        gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
        # 简单二值化，输入框通常是浅色或深色背景上的反色
        # 这里假设深色模式下，输入框可能较亮，或者有边框
        # 尝试自适应阈值
        thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                       cv2.THRESH_BINARY_INV, 11, 2)

        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        best_cnt = None
        max_area = 0

        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            # 筛选条件：宽度较大 (比如 > 50% 屏幕宽)，高度适中
            if w > width * 0.5 and h > 20:
                if w * h > max_area:
                    max_area = w * h
                    best_cnt = (x, y, w, h)

        if best_cnt:
            x, y, w, h = best_cnt
            center_x = x + w // 2
            center_y = crop_y_start + y + h // 2
            logger.info(f"找到输入框: ({center_x}, {center_y}), 尺寸: {w}x{h}")
            return center_x, center_y
        else:
            # 兜底：返回底部区域中心
            logger.warning("未找到明显输入框轮廓，使用默认坐标")
            return width // 2, int(height * 0.95)

    except Exception as e:
        logger.error(f"查找输入框失败: {e}")
        return None

def perform_input_action(coords, text):
    """
    点击坐标并输入文本
    """
    if u2 is None:
        logger.error("未安装 uiautomator2 库，无法执行自动化操作")
        return

    if not coords:
        logger.error("坐标无效，无法执行点击输入")
        return

    x, y = coords
    try:
        # 连接设备 (默认连接第一个 USB 设备)
        d = u2.connect()
        logger.info(f"设备连接成功: {d.info.get('serial')}")

        # 点击输入框
        logger.info(f"点击坐标: ({x}, {y})")
        d.click(x, y)

        # 等待键盘弹出或输入框激活
        time.sleep(1)

        # 输入文本
        logger.info(f"输入文本: {text}")
        d.send_keys(text)

        # 尝试发送回车键以确认输入 (视具体情况而定)
        # d.press("enter")

        logger.info("输入完成")

    except Exception as e:
        logger.error(f"自动化操作失败: {e}")
        print(f"自动化操作失败: {e}")

def analyze_chat_image(image_path, output_path, target_name="对方"):
    # 读取图片（支持中文路径）
    img_data = np.fromfile(image_path, dtype=np.uint8)
    img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)

    # 备份一份干净的图片用于 OCR (避免识别到画上去的框)
    img_clean = img.copy()

    height, width = img.shape[:2]
    print(f"图片尺寸: {width}x{height}")

    # 1. 预处理
    # 转为灰度图
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 使用自适应阈值二值化
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)

    # 形态学操作：闭运算，填充内部空洞
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

    # 2. 轮廓查找
    contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    print(f"检测到轮廓数量: {len(contours)}")

    # 收集所有符合条件的头像
    avatars = []

    # 3. 筛选轮廓
    for contour in contours:
        # 获取外接矩形
        x, y, w, h = cv2.boundingRect(contour)

        # 筛选条件优化：
        # 1. 形状接近正方形 (放宽宽高比限制: 0.8 ~ 1.2)
        aspect_ratio = float(w) / h

        # 2. 尺寸适中
        # 假设头像宽度在屏幕宽度的 6% 到 15% 之间
        min_w = width * 0.06
        max_w = width * 0.15

        # 3. 位置筛选
        # 排除底部输入框区域 (假设底部 10% 为输入区域)
        if y > height * 0.9:
            continue

        # 左侧头像：靠左边 (x < width * 0.18)
        # 右侧头像：靠右边 (x > width * 0.82)
        is_left = x < width * 0.18
        is_right = x > width * 0.82

        if 0.8 <= aspect_ratio <= 1.2 and min_w < w < max_w:
            if is_left or is_right:
                side = "Left" if is_left else "Right"
                avatars.append({
                    'x': x, 'y': y, 'w': w, 'h': h,
                    'side': side
                })

    # 按 y 坐标排序
    avatars.sort(key=lambda a: a['y'])

    print(f"找到有效头像数量: {len(avatars)}")

    # 初始化 OCR
    try:
        reader = get_easyocr_reader(gpu=True)
        print("OCR 初始化成功")
    except Exception as e:
        print(f"OCR 初始化失败: {e}")
        reader = None

    dialogue_log = []

    # 4. 绘制对话内容框 (Green/Red Boxes)
    # 策略：按顺序遍历头像，如果发现同侧连续，则视为一组。
    # 从当前组的第一个头像上方开始，直到下一个不同侧的头像上方（或底部）。

    if avatars:
        i = 0
        while i < len(avatars):
            current_group_start = i
            current_side = avatars[i]['side']

            # 找到当前组的结束位置 (即下一个不同侧头像的索引)
            j = i + 1
            while j < len(avatars) and avatars[j]['side'] == current_side:
                j += 1

            # 当前组范围: avatars[i] ... avatars[j-1]
            # 确定绘制区域的 Y 轴范围

            # Start Y: 当前组第一个头像的上方 (例如 -10px)
            start_y = max(0, avatars[i]['y'] - 10)

            # End Y: 下一组第一个头像的上方 (减去较大间距，例如 -30px)，或者当前组最后一个头像的底部加上边距
            # 为了让框之间有明显间隔，我们采取策略：
            # 如果有下一组，End Y = 下一组第一个头像的 y - 30 (留出间隙)
            # 如果没有下一组，End Y = 屏幕底部区域上方

            if j < len(avatars):
                end_y = max(start_y + 10, avatars[j]['y'] - 30)
            else:
                end_y = int(height * 0.9) # 到底部输入框上方

            # 绘制大框
            # 左侧 (Left) -> 对方 -> 绿色 (0, 255, 0)
            # 右侧 (Right) -> 我 -> 红色 (0, 0, 255)
            # 注意 OpenCV 颜色是 BGR
            box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)

            # 绘制矩形 (空心，线宽 5)
            # X 轴范围：0 到 width
            cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)

            print(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")

            # OCR 识别区域文本
            if reader:
                try:
                    # 从原图(img_clean)裁剪区域
                    # 注意边界检查
                    safe_start_y = max(0, start_y)
                    safe_end_y = min(height, end_y)

                    if safe_end_y > safe_start_y:
                        roi_img = img_clean[safe_start_y:safe_end_y, 0:width]

                        # 识别
                        results = reader.read_text(roi_img)

                        # 过滤关键词
                        ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]

                        # 过滤并拼接文本 (置信度 > 0.3)
                        texts = []
                        for res in results:
                            text = res[1]
                            prob = res[2]
                            if prob > 0.3:
                                # 1. 检查是否包含屏蔽词
                                if any(kw in text for kw in ignore_keywords):
                                    continue

                                # 2. 检查是否为单行时间 (如 11:35, 09:00)
                                # 正则匹配: 只有时间格式，没有其他文字
                                if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
                                    continue

                                texts.append(text)

                        combined_text = " ".join(texts)

                        if combined_text.strip():
                            role = target_name if current_side == "Left" else "我"
                            dialogue_log.append(f"{role}: {combined_text}")
                            print(f"  -> OCR结果: {combined_text}")
                except Exception as e:
                    print(f"  -> OCR出错: {e}")

            # 移动到下一组
            i = j

    # 5. 绘制头像框 (Blue/Yellow Boxes) - 画在内容框之上
    for av in avatars:
        x, y, w, h = av['x'], av['y'], av['w'], av['h']
        # 左侧：蓝色 (BGR: 255, 0, 0)
        # 右侧：黄色 (BGR: 0, 255, 255)
        color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)

        cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
        print(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")

    # 6. 保存结果
    try:
        # cv2.imwrite 不支持中文路径，使用 imencode + tofile
        ext = os.path.splitext(output_path)[1]
        cv2.imencode(ext, img)[1].tofile(output_path)
        print(f"结果已保存至: {output_path}")
    except Exception as e:
        print(f"保存图片失败: {e}")

    print("\n" + "="*30)
    print("对话内容汇总:")
    for line in dialogue_log:
        print(line)
    print("="*30 + "\n")

if __name__ == "__main__":
    # 输入文件路径
    input_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_20260121_113553.jpg"
    # 输出文件路径
    output_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_analyzed.jpg"

    # 目标联系人名称 (对应搜索关键字)
    target_name = "糖豆爸爸"

    analyze_chat_image(input_file, output_file, target_name=target_name)

    # 2. 查找输入框并执行自动化操作
    print("\n" + "="*30)
    print("开始执行自动化输入...")

    # 注意: 这里使用 input_file (截图) 来定位坐标
    # 前提是截图时的界面布局与当前设备界面一致
    coords = find_input_box_center(input_file)
    if coords:
        print(f"输入框坐标: {coords}")
        perform_input_action(coords, "AI助手我现在可以开始和你聊天了！")
    else:
        print("未找到输入框坐标")