331 lines
12 KiB
Python
331 lines
12 KiB
Python
# coding=utf-8
|
||
import cv2
|
||
import numpy as np
|
||
import sys
|
||
import os
|
||
import logging
|
||
import re
|
||
import time
|
||
try:
|
||
import uiautomator2 as u2
|
||
except ImportError:
|
||
u2 = None
|
||
|
||
# 添加项目根目录到 sys.path 以便导入 Util
|
||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||
if project_root not in sys.path:
|
||
sys.path.append(project_root)
|
||
|
||
from Util.EasyOcrKit import get_easyocr_reader
|
||
|
||
# 配置日志
|
||
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||
logger = logging.getLogger(__name__)
|
||
|
||
def find_input_box_center(image_path):
|
||
"""
|
||
识别底部输入框的中心坐标
|
||
"""
|
||
try:
|
||
img_data = np.fromfile(image_path, dtype=np.uint8)
|
||
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
|
||
if img is None:
|
||
return None
|
||
|
||
height, width = img.shape[:2]
|
||
|
||
# 截取底部 15% 区域
|
||
bottom_h = int(height * 0.15)
|
||
crop_y_start = height - bottom_h
|
||
crop = img[crop_y_start:height, 0:width]
|
||
|
||
# 预处理
|
||
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
|
||
# 简单二值化,输入框通常是浅色或深色背景上的反色
|
||
# 这里假设深色模式下,输入框可能较亮,或者有边框
|
||
# 尝试自适应阈值
|
||
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||
cv2.THRESH_BINARY_INV, 11, 2)
|
||
|
||
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||
|
||
best_cnt = None
|
||
max_area = 0
|
||
|
||
for cnt in contours:
|
||
x, y, w, h = cv2.boundingRect(cnt)
|
||
# 筛选条件:宽度较大 (比如 > 50% 屏幕宽),高度适中
|
||
if w > width * 0.5 and h > 20:
|
||
if w * h > max_area:
|
||
max_area = w * h
|
||
best_cnt = (x, y, w, h)
|
||
|
||
if best_cnt:
|
||
x, y, w, h = best_cnt
|
||
center_x = x + w // 2
|
||
center_y = crop_y_start + y + h // 2
|
||
logger.info(f"找到输入框: ({center_x}, {center_y}), 尺寸: {w}x{h}")
|
||
return center_x, center_y
|
||
else:
|
||
# 兜底:返回底部区域中心
|
||
logger.warning("未找到明显输入框轮廓,使用默认坐标")
|
||
return width // 2, int(height * 0.95)
|
||
|
||
except Exception as e:
|
||
logger.error(f"查找输入框失败: {e}")
|
||
return None
|
||
|
||
def perform_input_action(coords, text):
|
||
"""
|
||
点击坐标并输入文本
|
||
"""
|
||
if u2 is None:
|
||
logger.error("未安装 uiautomator2 库,无法执行自动化操作")
|
||
return
|
||
|
||
if not coords:
|
||
logger.error("坐标无效,无法执行点击输入")
|
||
return
|
||
|
||
x, y = coords
|
||
try:
|
||
# 连接设备 (默认连接第一个 USB 设备)
|
||
d = u2.connect()
|
||
logger.info(f"设备连接成功: {d.info.get('serial')}")
|
||
|
||
# 点击输入框
|
||
logger.info(f"点击坐标: ({x}, {y})")
|
||
d.click(x, y)
|
||
|
||
# 等待键盘弹出或输入框激活
|
||
time.sleep(1)
|
||
|
||
# 输入文本
|
||
logger.info(f"输入文本: {text}")
|
||
d.send_keys(text)
|
||
|
||
# 尝试发送回车键以确认输入 (视具体情况而定)
|
||
# d.press("enter")
|
||
|
||
logger.info("输入完成")
|
||
|
||
except Exception as e:
|
||
logger.error(f"自动化操作失败: {e}")
|
||
print(f"自动化操作失败: {e}")
|
||
|
||
def analyze_chat_image(image_path, output_path, target_name="对方"):
|
||
# 读取图片(支持中文路径)
|
||
img_data = np.fromfile(image_path, dtype=np.uint8)
|
||
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
|
||
|
||
# 备份一份干净的图片用于 OCR (避免识别到画上去的框)
|
||
img_clean = img.copy()
|
||
|
||
height, width = img.shape[:2]
|
||
print(f"图片尺寸: {width}x{height}")
|
||
|
||
# 1. 预处理
|
||
# 转为灰度图
|
||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||
|
||
# 使用自适应阈值二值化
|
||
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||
cv2.THRESH_BINARY_INV, 11, 2)
|
||
|
||
# 形态学操作:闭运算,填充内部空洞
|
||
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
|
||
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
|
||
|
||
# 2. 轮廓查找
|
||
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||
|
||
print(f"检测到轮廓数量: {len(contours)}")
|
||
|
||
# 收集所有符合条件的头像
|
||
avatars = []
|
||
|
||
# 3. 筛选轮廓
|
||
for contour in contours:
|
||
# 获取外接矩形
|
||
x, y, w, h = cv2.boundingRect(contour)
|
||
|
||
# 筛选条件优化:
|
||
# 1. 形状接近正方形 (放宽宽高比限制: 0.8 ~ 1.2)
|
||
aspect_ratio = float(w) / h
|
||
|
||
# 2. 尺寸适中
|
||
# 假设头像宽度在屏幕宽度的 6% 到 15% 之间
|
||
min_w = width * 0.06
|
||
max_w = width * 0.15
|
||
|
||
# 3. 位置筛选
|
||
# 排除底部输入框区域 (假设底部 10% 为输入区域)
|
||
if y > height * 0.9:
|
||
continue
|
||
|
||
# 左侧头像:靠左边 (x < width * 0.18)
|
||
# 右侧头像:靠右边 (x > width * 0.82)
|
||
is_left = x < width * 0.18
|
||
is_right = x > width * 0.82
|
||
|
||
if 0.8 <= aspect_ratio <= 1.2 and min_w < w < max_w:
|
||
if is_left or is_right:
|
||
side = "Left" if is_left else "Right"
|
||
avatars.append({
|
||
'x': x, 'y': y, 'w': w, 'h': h,
|
||
'side': side
|
||
})
|
||
|
||
# 按 y 坐标排序
|
||
avatars.sort(key=lambda a: a['y'])
|
||
|
||
print(f"找到有效头像数量: {len(avatars)}")
|
||
|
||
# 初始化 OCR
|
||
try:
|
||
reader = get_easyocr_reader(gpu=True)
|
||
print("OCR 初始化成功")
|
||
except Exception as e:
|
||
print(f"OCR 初始化失败: {e}")
|
||
reader = None
|
||
|
||
dialogue_log = []
|
||
|
||
# 4. 绘制对话内容框 (Green/Red Boxes)
|
||
# 策略:按顺序遍历头像,如果发现同侧连续,则视为一组。
|
||
# 从当前组的第一个头像上方开始,直到下一个不同侧的头像上方(或底部)。
|
||
|
||
if avatars:
|
||
i = 0
|
||
while i < len(avatars):
|
||
current_group_start = i
|
||
current_side = avatars[i]['side']
|
||
|
||
# 找到当前组的结束位置 (即下一个不同侧头像的索引)
|
||
j = i + 1
|
||
while j < len(avatars) and avatars[j]['side'] == current_side:
|
||
j += 1
|
||
|
||
# 当前组范围: avatars[i] ... avatars[j-1]
|
||
# 确定绘制区域的 Y 轴范围
|
||
|
||
# Start Y: 当前组第一个头像的上方 (例如 -10px)
|
||
start_y = max(0, avatars[i]['y'] - 10)
|
||
|
||
# End Y: 下一组第一个头像的上方 (减去较大间距,例如 -30px),或者当前组最后一个头像的底部加上边距
|
||
# 为了让框之间有明显间隔,我们采取策略:
|
||
# 如果有下一组,End Y = 下一组第一个头像的 y - 30 (留出间隙)
|
||
# 如果没有下一组,End Y = 屏幕底部区域上方
|
||
|
||
if j < len(avatars):
|
||
end_y = max(start_y + 10, avatars[j]['y'] - 30)
|
||
else:
|
||
end_y = int(height * 0.9) # 到底部输入框上方
|
||
|
||
# 绘制大框
|
||
# 左侧 (Left) -> 对方 -> 绿色 (0, 255, 0)
|
||
# 右侧 (Right) -> 我 -> 红色 (0, 0, 255)
|
||
# 注意 OpenCV 颜色是 BGR
|
||
box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)
|
||
|
||
# 绘制矩形 (空心,线宽 5)
|
||
# X 轴范围:0 到 width
|
||
cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)
|
||
|
||
print(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
|
||
|
||
# OCR 识别区域文本
|
||
if reader:
|
||
try:
|
||
# 从原图(img_clean)裁剪区域
|
||
# 注意边界检查
|
||
safe_start_y = max(0, start_y)
|
||
safe_end_y = min(height, end_y)
|
||
|
||
if safe_end_y > safe_start_y:
|
||
roi_img = img_clean[safe_start_y:safe_end_y, 0:width]
|
||
|
||
# 识别
|
||
results = reader.read_text(roi_img)
|
||
|
||
# 过滤关键词
|
||
ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]
|
||
|
||
# 过滤并拼接文本 (置信度 > 0.3)
|
||
texts = []
|
||
for res in results:
|
||
text = res[1]
|
||
prob = res[2]
|
||
if prob > 0.3:
|
||
# 1. 检查是否包含屏蔽词
|
||
if any(kw in text for kw in ignore_keywords):
|
||
continue
|
||
|
||
# 2. 检查是否为单行时间 (如 11:35, 09:00)
|
||
# 正则匹配: 只有时间格式,没有其他文字
|
||
if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
|
||
continue
|
||
|
||
texts.append(text)
|
||
|
||
combined_text = " ".join(texts)
|
||
|
||
if combined_text.strip():
|
||
role = target_name if current_side == "Left" else "我"
|
||
dialogue_log.append(f"{role}: {combined_text}")
|
||
print(f" -> OCR结果: {combined_text}")
|
||
except Exception as e:
|
||
print(f" -> OCR出错: {e}")
|
||
|
||
# 移动到下一组
|
||
i = j
|
||
|
||
# 5. 绘制头像框 (Blue/Yellow Boxes) - 画在内容框之上
|
||
for av in avatars:
|
||
x, y, w, h = av['x'], av['y'], av['w'], av['h']
|
||
# 左侧:蓝色 (BGR: 255, 0, 0)
|
||
# 右侧:黄色 (BGR: 0, 255, 255)
|
||
color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)
|
||
|
||
cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
|
||
print(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")
|
||
|
||
# 6. 保存结果
|
||
try:
|
||
# cv2.imwrite 不支持中文路径,使用 imencode + tofile
|
||
ext = os.path.splitext(output_path)[1]
|
||
cv2.imencode(ext, img)[1].tofile(output_path)
|
||
print(f"结果已保存至: {output_path}")
|
||
except Exception as e:
|
||
print(f"保存图片失败: {e}")
|
||
|
||
print("\n" + "="*30)
|
||
print("对话内容汇总:")
|
||
for line in dialogue_log:
|
||
print(line)
|
||
print("="*30 + "\n")
|
||
|
||
if __name__ == "__main__":
|
||
# 输入文件路径
|
||
input_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_20260121_113553.jpg"
|
||
# 输出文件路径
|
||
output_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_analyzed.jpg"
|
||
|
||
# 目标联系人名称 (对应搜索关键字)
|
||
target_name = "糖豆爸爸"
|
||
|
||
analyze_chat_image(input_file, output_file, target_name=target_name)
|
||
|
||
# 2. 查找输入框并执行自动化操作
|
||
print("\n" + "="*30)
|
||
print("开始执行自动化输入...")
|
||
|
||
# 注意: 这里使用 input_file (截图) 来定位坐标
|
||
# 前提是截图时的界面布局与当前设备界面一致
|
||
coords = find_input_box_center(input_file)
|
||
if coords:
|
||
print(f"输入框坐标: {coords}")
|
||
perform_input_action(coords, "AI助手我现在可以开始和你聊天了!")
|
||
else:
|
||
print("未找到输入框坐标")
|