Files
aiData/Test/analyze_chat_avatars.py
HuangHai af46512212 'commit'
2026-01-21 14:13:26 +08:00

331 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
import cv2
import numpy as np
import sys
import os
import logging
import re
import time
try:
import uiautomator2 as u2
except ImportError:
u2 = None
# 添加项目根目录到 sys.path 以便导入 Util
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
from Util.EasyOcrKit import get_easyocr_reader
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger(__name__)
def find_input_box_center(image_path):
"""
识别底部输入框的中心坐标
"""
try:
img_data = np.fromfile(image_path, dtype=np.uint8)
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
if img is None:
return None
height, width = img.shape[:2]
# 截取底部 15% 区域
bottom_h = int(height * 0.15)
crop_y_start = height - bottom_h
crop = img[crop_y_start:height, 0:width]
# 预处理
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
# 简单二值化,输入框通常是浅色或深色背景上的反色
# 这里假设深色模式下,输入框可能较亮,或者有边框
# 尝试自适应阈值
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
best_cnt = None
max_area = 0
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# 筛选条件:宽度较大 (比如 > 50% 屏幕宽),高度适中
if w > width * 0.5 and h > 20:
if w * h > max_area:
max_area = w * h
best_cnt = (x, y, w, h)
if best_cnt:
x, y, w, h = best_cnt
center_x = x + w // 2
center_y = crop_y_start + y + h // 2
logger.info(f"找到输入框: ({center_x}, {center_y}), 尺寸: {w}x{h}")
return center_x, center_y
else:
# 兜底:返回底部区域中心
logger.warning("未找到明显输入框轮廓,使用默认坐标")
return width // 2, int(height * 0.95)
except Exception as e:
logger.error(f"查找输入框失败: {e}")
return None
def perform_input_action(coords, text):
"""
点击坐标并输入文本
"""
if u2 is None:
logger.error("未安装 uiautomator2 库,无法执行自动化操作")
return
if not coords:
logger.error("坐标无效,无法执行点击输入")
return
x, y = coords
try:
# 连接设备 (默认连接第一个 USB 设备)
d = u2.connect()
logger.info(f"设备连接成功: {d.info.get('serial')}")
# 点击输入框
logger.info(f"点击坐标: ({x}, {y})")
d.click(x, y)
# 等待键盘弹出或输入框激活
time.sleep(1)
# 输入文本
logger.info(f"输入文本: {text}")
d.send_keys(text)
# 尝试发送回车键以确认输入 (视具体情况而定)
# d.press("enter")
logger.info("输入完成")
except Exception as e:
logger.error(f"自动化操作失败: {e}")
print(f"自动化操作失败: {e}")
def analyze_chat_image(image_path, output_path, target_name="对方"):
# 读取图片(支持中文路径)
img_data = np.fromfile(image_path, dtype=np.uint8)
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
# 备份一份干净的图片用于 OCR (避免识别到画上去的框)
img_clean = img.copy()
height, width = img.shape[:2]
print(f"图片尺寸: {width}x{height}")
# 1. 预处理
# 转为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 使用自适应阈值二值化
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
# 形态学操作:闭运算,填充内部空洞
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# 2. 轮廓查找
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
print(f"检测到轮廓数量: {len(contours)}")
# 收集所有符合条件的头像
avatars = []
# 3. 筛选轮廓
for contour in contours:
# 获取外接矩形
x, y, w, h = cv2.boundingRect(contour)
# 筛选条件优化:
# 1. 形状接近正方形 (放宽宽高比限制: 0.8 ~ 1.2)
aspect_ratio = float(w) / h
# 2. 尺寸适中
# 假设头像宽度在屏幕宽度的 6% 到 15% 之间
min_w = width * 0.06
max_w = width * 0.15
# 3. 位置筛选
# 排除底部输入框区域 (假设底部 10% 为输入区域)
if y > height * 0.9:
continue
# 左侧头像:靠左边 (x < width * 0.18)
# 右侧头像:靠右边 (x > width * 0.82)
is_left = x < width * 0.18
is_right = x > width * 0.82
if 0.8 <= aspect_ratio <= 1.2 and min_w < w < max_w:
if is_left or is_right:
side = "Left" if is_left else "Right"
avatars.append({
'x': x, 'y': y, 'w': w, 'h': h,
'side': side
})
# 按 y 坐标排序
avatars.sort(key=lambda a: a['y'])
print(f"找到有效头像数量: {len(avatars)}")
# 初始化 OCR
try:
reader = get_easyocr_reader(gpu=True)
print("OCR 初始化成功")
except Exception as e:
print(f"OCR 初始化失败: {e}")
reader = None
dialogue_log = []
# 4. 绘制对话内容框 (Green/Red Boxes)
# 策略:按顺序遍历头像,如果发现同侧连续,则视为一组。
# 从当前组的第一个头像上方开始,直到下一个不同侧的头像上方(或底部)。
if avatars:
i = 0
while i < len(avatars):
current_group_start = i
current_side = avatars[i]['side']
# 找到当前组的结束位置 (即下一个不同侧头像的索引)
j = i + 1
while j < len(avatars) and avatars[j]['side'] == current_side:
j += 1
# 当前组范围: avatars[i] ... avatars[j-1]
# 确定绘制区域的 Y 轴范围
# Start Y: 当前组第一个头像的上方 (例如 -10px)
start_y = max(0, avatars[i]['y'] - 10)
# End Y: 下一组第一个头像的上方 (减去较大间距,例如 -30px),或者当前组最后一个头像的底部加上边距
# 为了让框之间有明显间隔,我们采取策略:
# 如果有下一组End Y = 下一组第一个头像的 y - 30 (留出间隙)
# 如果没有下一组End Y = 屏幕底部区域上方
if j < len(avatars):
end_y = max(start_y + 10, avatars[j]['y'] - 30)
else:
end_y = int(height * 0.9) # 到底部输入框上方
# 绘制大框
# 左侧 (Left) -> 对方 -> 绿色 (0, 255, 0)
# 右侧 (Right) -> 我 -> 红色 (0, 0, 255)
# 注意 OpenCV 颜色是 BGR
box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)
# 绘制矩形 (空心,线宽 5)
# X 轴范围0 到 width
cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)
print(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
# OCR 识别区域文本
if reader:
try:
# 从原图(img_clean)裁剪区域
# 注意边界检查
safe_start_y = max(0, start_y)
safe_end_y = min(height, end_y)
if safe_end_y > safe_start_y:
roi_img = img_clean[safe_start_y:safe_end_y, 0:width]
# 识别
results = reader.read_text(roi_img)
# 过滤关键词
ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]
# 过滤并拼接文本 (置信度 > 0.3)
texts = []
for res in results:
text = res[1]
prob = res[2]
if prob > 0.3:
# 1. 检查是否包含屏蔽词
if any(kw in text for kw in ignore_keywords):
continue
# 2. 检查是否为单行时间 (如 11:35, 09:00)
# 正则匹配: 只有时间格式,没有其他文字
if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
continue
texts.append(text)
combined_text = " ".join(texts)
if combined_text.strip():
role = target_name if current_side == "Left" else ""
dialogue_log.append(f"{role}: {combined_text}")
print(f" -> OCR结果: {combined_text}")
except Exception as e:
print(f" -> OCR出错: {e}")
# 移动到下一组
i = j
# 5. 绘制头像框 (Blue/Yellow Boxes) - 画在内容框之上
for av in avatars:
x, y, w, h = av['x'], av['y'], av['w'], av['h']
# 左侧:蓝色 (BGR: 255, 0, 0)
# 右侧:黄色 (BGR: 0, 255, 255)
color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)
cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
print(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")
# 6. 保存结果
try:
# cv2.imwrite 不支持中文路径,使用 imencode + tofile
ext = os.path.splitext(output_path)[1]
cv2.imencode(ext, img)[1].tofile(output_path)
print(f"结果已保存至: {output_path}")
except Exception as e:
print(f"保存图片失败: {e}")
print("\n" + "="*30)
print("对话内容汇总:")
for line in dialogue_log:
print(line)
print("="*30 + "\n")
if __name__ == "__main__":
# 输入文件路径
input_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_20260121_113553.jpg"
# 输出文件路径
output_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_analyzed.jpg"
# 目标联系人名称 (对应搜索关键字)
target_name = "糖豆爸爸"
analyze_chat_image(input_file, output_file, target_name=target_name)
# 2. 查找输入框并执行自动化操作
print("\n" + "="*30)
print("开始执行自动化输入...")
# 注意: 这里使用 input_file (截图) 来定位坐标
# 前提是截图时的界面布局与当前设备界面一致
coords = find_input_box_center(input_file)
if coords:
print(f"输入框坐标: {coords}")
perform_input_action(coords, "AI助手我现在可以开始和你聊天了")
else:
print("未找到输入框坐标")