'commit'
This commit is contained in:
Binary file not shown.
|
Before Width: | Height: | Size: 68 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 158 KiB |
@@ -6,19 +6,400 @@ import sys
|
||||
import os
|
||||
import cv2
|
||||
import numpy as np
|
||||
import re
|
||||
|
||||
# 添加项目根目录到 sys.path 以便导入 Util
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
from Util.EasyOcrKit import get_easyocr_reader
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger("TestWeChat")
|
||||
|
||||
def analyze_chat_image(image_path, output_path):
|
||||
def find_input_box_center(image_path):
|
||||
"""
|
||||
识别微信聊天截图中的头像并画框
|
||||
识别底部输入框的中心坐标,返回 (center_x, center_y, rect_box)
|
||||
rect_box 用于可视化: (x, y, w, h) (相对于原图)
|
||||
"""
|
||||
try:
|
||||
img_data = np.fromfile(image_path, dtype=np.uint8)
|
||||
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
return None, None
|
||||
|
||||
height, width = img.shape[:2]
|
||||
|
||||
# 截取底部 12% 区域 (缩小范围以精准定位)
|
||||
bottom_h = int(height * 0.12)
|
||||
crop_y_start = height - bottom_h
|
||||
crop = img[crop_y_start:height, 0:width]
|
||||
|
||||
# 预处理
|
||||
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# 策略1: Canny 边缘检测 (对低对比度 UI 更有效)
|
||||
edges = cv2.Canny(gray, 50, 150)
|
||||
# 膨胀连接断裂的边缘
|
||||
kernel = np.ones((3,3), np.uint8)
|
||||
dilated = cv2.dilate(edges, kernel, iterations=1)
|
||||
|
||||
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
best_cnt = None
|
||||
max_area = 0
|
||||
|
||||
for cnt in contours:
|
||||
x, y, w, h = cv2.boundingRect(cnt)
|
||||
# 筛选条件优化:
|
||||
# 1. 宽度: 屏幕宽度的 50% - 98% (通常输入框很长)
|
||||
# 2. 高度: 30px - 底部区域的 90%
|
||||
if width * 0.5 < w < width * 0.98 and 30 < h < bottom_h * 0.9:
|
||||
# 计算中心点 Y 坐标相对于全图
|
||||
global_y = crop_y_start + y + h // 2
|
||||
|
||||
# 排除过于靠下的区域 (导航栏/手势条),通常在最后 2%
|
||||
if global_y > height * 0.98:
|
||||
continue
|
||||
|
||||
if w * h > max_area:
|
||||
max_area = w * h
|
||||
best_cnt = (x, y, w, h)
|
||||
|
||||
if best_cnt:
|
||||
x, y, w, h = best_cnt
|
||||
center_x = x + w // 2
|
||||
center_y = crop_y_start + y + h // 2
|
||||
logger.info(f"找到输入框(CV-Canny): ({center_x}, {center_y}), 尺寸: {w}x{h}")
|
||||
return (center_x, center_y), (x, crop_y_start + y, w, h)
|
||||
|
||||
# 策略2: 自适应阈值 (原有逻辑作为备份)
|
||||
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY_INV, 11, 2)
|
||||
contours_thresh, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
for cnt in contours_thresh:
|
||||
x, y, w, h = cv2.boundingRect(cnt)
|
||||
if width * 0.4 < w < width * 0.95 and 20 < h < bottom_h * 0.8:
|
||||
global_y = crop_y_start + y + h // 2
|
||||
if global_y > height * 0.98:
|
||||
continue
|
||||
if w * h > max_area:
|
||||
max_area = w * h
|
||||
best_cnt = (x, y, w, h)
|
||||
|
||||
if best_cnt:
|
||||
x, y, w, h = best_cnt
|
||||
center_x = x + w // 2
|
||||
center_y = crop_y_start + y + h // 2
|
||||
logger.info(f"找到输入框(CV-Adaptive): ({center_x}, {center_y}), 尺寸: {w}x{h}")
|
||||
return (center_x, center_y), (x, crop_y_start + y, w, h)
|
||||
|
||||
# 兜底策略:使用更靠下的默认坐标 (94%)
|
||||
# 之前的 90.5% 用户反馈偏上
|
||||
logger.warning("未找到明显输入框轮廓,使用更靠下的默认坐标 (94%)")
|
||||
default_y = int(height * 0.94)
|
||||
center_x = width // 2
|
||||
|
||||
# 构造假想框
|
||||
fake_w = int(width * 0.7)
|
||||
fake_h = int(height * 0.08) # 稍微加高一点,视觉上更像
|
||||
fake_x = (width - fake_w) // 2
|
||||
fake_y = default_y - fake_h // 2
|
||||
|
||||
return (center_x, default_y), (fake_x, fake_y, fake_w, fake_h)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查找输入框失败: {e}")
|
||||
return None, None
|
||||
|
||||
def find_send_button(d):
|
||||
"""
|
||||
截图并寻找发送按钮 (绿色按钮)
|
||||
扩大搜索范围以适应键盘弹出的情况
|
||||
"""
|
||||
try:
|
||||
# 截图到 Screenshots 目录方便调试
|
||||
screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
|
||||
if not os.path.exists(screenshot_dir):
|
||||
os.makedirs(screenshot_dir)
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
debug_shot_path = os.path.join(screenshot_dir, f"debug_send_check_{timestamp}.jpg")
|
||||
|
||||
d.screenshot(debug_shot_path)
|
||||
logger.info(f"发送按钮查找调试截图已保存: {debug_shot_path}")
|
||||
|
||||
img = cv2.imread(debug_shot_path)
|
||||
if img is None:
|
||||
return None
|
||||
|
||||
h, w = img.shape[:2]
|
||||
|
||||
# ROI: 底部 60% (考虑到键盘弹出,按钮可能被顶上去)
|
||||
# 且只关注右侧 30%
|
||||
roi_h = int(h * 0.6)
|
||||
roi_w = int(w * 0.3)
|
||||
y_start = h - roi_h
|
||||
x_start = w - roi_w
|
||||
|
||||
roi = img[y_start:h, x_start:w]
|
||||
|
||||
# 转换 HSV
|
||||
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
||||
|
||||
# 绿色范围 (WeChat Green)
|
||||
lower_green = np.array([35, 80, 80])
|
||||
upper_green = np.array([90, 255, 255])
|
||||
|
||||
mask = cv2.inRange(hsv, lower_green, upper_green)
|
||||
|
||||
# 查找轮廓
|
||||
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
if contours:
|
||||
# 找符合条件的轮廓
|
||||
valid_candidates = []
|
||||
for cnt in contours:
|
||||
area = cv2.contourArea(cnt)
|
||||
x, y, cw, ch = cv2.boundingRect(cnt)
|
||||
|
||||
# 过滤太小的噪点和太大的区域(例如全屏背景)
|
||||
# 发送按钮通常面积在 2000-15000 之间 (视分辨率而定)
|
||||
if 500 < area < 30000:
|
||||
# 宽高比检查:发送按钮通常接近正方形或微扁 (ratio < 2.5)
|
||||
ratio = float(cw) / ch
|
||||
if 0.5 < ratio < 3.0:
|
||||
# 坐标还原到原图
|
||||
global_y = y_start + y
|
||||
valid_candidates.append({
|
||||
'cnt': cnt,
|
||||
'area': area,
|
||||
'y': global_y,
|
||||
'rect': (x, y, cw, ch)
|
||||
})
|
||||
|
||||
if valid_candidates:
|
||||
# 核心逻辑:发送按钮一定是所有绿色元素中最靠下的 (Y坐标最大)
|
||||
# 且在最右侧
|
||||
# 先按 Y 坐标降序排序
|
||||
valid_candidates.sort(key=lambda c: c['y'], reverse=True)
|
||||
|
||||
# 取最靠下的一个 (可能是发送按钮)
|
||||
best = valid_candidates[0]
|
||||
|
||||
# 获取中心点
|
||||
bx, by, bw, bh = best['rect']
|
||||
cx = x_start + bx + bw // 2
|
||||
cy = y_start + by + bh // 2
|
||||
|
||||
logger.info(f"通过图像识别找到发送按钮 (Bottom-Most): ({cx}, {cy}), 面积: {best['area']}")
|
||||
return cx, cy
|
||||
|
||||
logger.warning("未通过图像识别找到绿色发送按钮")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查找发送按钮出错: {e}")
|
||||
return None
|
||||
|
||||
def take_debug_screenshot(d, step_name):
|
||||
"""
|
||||
调试专用截图函数
|
||||
"""
|
||||
try:
|
||||
screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
|
||||
if not os.path.exists(screenshot_dir):
|
||||
os.makedirs(screenshot_dir)
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"debug_{timestamp}_{step_name}.jpg"
|
||||
save_path = os.path.join(screenshot_dir, filename)
|
||||
d.screenshot(save_path)
|
||||
logger.info(f"📸 [调试截图] {step_name} 已保存: {filename}")
|
||||
return save_path
|
||||
except Exception as e:
|
||||
logger.error(f"截图失败 ({step_name}): {e}")
|
||||
return None
|
||||
|
||||
def perform_input_action(coords, text):
|
||||
"""
|
||||
点击坐标并输入文本
|
||||
"""
|
||||
# 优先尝试使用 uiautomator2 的原生控件查找 (更稳健)
|
||||
native_success = False
|
||||
try:
|
||||
d = u2.connect()
|
||||
# 查找 EditText 控件
|
||||
input_elem = d(className="android.widget.EditText")
|
||||
|
||||
if input_elem.exists:
|
||||
logger.info("发现原生输入框控件,尝试点击...")
|
||||
|
||||
# 1. 截图:点击前
|
||||
take_debug_screenshot(d, "native_01_before_click")
|
||||
|
||||
# 双击策略
|
||||
input_elem.click()
|
||||
time.sleep(0.5)
|
||||
input_elem.click()
|
||||
time.sleep(1)
|
||||
|
||||
# 2. 截图:点击后 (预期键盘弹出)
|
||||
take_debug_screenshot(d, "native_02_after_click_keyboard")
|
||||
|
||||
logger.info(f"输入文本: {text}")
|
||||
|
||||
# 尝试 set_text + send_keys 组合
|
||||
try:
|
||||
input_elem.set_text(text)
|
||||
except:
|
||||
pass
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
# 检查文本是否输入成功,如果没有,尝试 send_keys
|
||||
try:
|
||||
current_text = input_elem.get_text()
|
||||
if not current_text or current_text != text:
|
||||
logger.warning(f"set_text 似乎未生效 (当前: {current_text}),尝试 send_keys...")
|
||||
d.send_keys(text)
|
||||
except:
|
||||
d.send_keys(text)
|
||||
|
||||
# 3. 截图:输入文本后
|
||||
take_debug_screenshot(d, "native_03_after_text_input")
|
||||
|
||||
# 尝试发送回车键
|
||||
time.sleep(0.5)
|
||||
d.press("enter")
|
||||
|
||||
# 尝试点击发送按钮
|
||||
try:
|
||||
if d(text="发送").exists:
|
||||
d(text="发送").click()
|
||||
logger.info("已点击 '发送' 按钮 (Native Text)")
|
||||
take_debug_screenshot(d, "native_04_after_send_click_text")
|
||||
else:
|
||||
logger.info("未找到 '发送' 文本控件,尝试图像识别...")
|
||||
send_btn_coords = find_send_button(d)
|
||||
if send_btn_coords:
|
||||
sx, sy = send_btn_coords
|
||||
d.click(sx, sy)
|
||||
logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}")
|
||||
take_debug_screenshot(d, "native_04_after_send_click_image")
|
||||
else:
|
||||
width, height = d.window_size()
|
||||
fallback_x = int(width * 0.9)
|
||||
fallback_y = int(height * 0.965)
|
||||
logger.info(f"未识别到发送按钮,尝试盲点右下角: {fallback_x}, {fallback_y}")
|
||||
d.click(fallback_x, fallback_y)
|
||||
take_debug_screenshot(d, "native_04_after_send_click_fallback")
|
||||
except Exception as e:
|
||||
logger.error(f"点击发送按钮失败: {e}")
|
||||
|
||||
logger.info("输入完成 (Native)")
|
||||
native_success = True
|
||||
return
|
||||
|
||||
else:
|
||||
logger.warning("未找到输入框元素 (Native),转入坐标点击模式...")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"原生控件操作失败,降级为坐标点击: {e}")
|
||||
|
||||
if native_success:
|
||||
return
|
||||
|
||||
# 降级方案:使用坐标点击
|
||||
if not coords:
|
||||
logger.error("坐标无效,无法执行点击输入")
|
||||
return
|
||||
|
||||
x, y = coords
|
||||
|
||||
# 坐标安全检查
|
||||
if 'd' not in locals():
|
||||
d = u2.connect()
|
||||
|
||||
try:
|
||||
width, height = d.window_size()
|
||||
# 移除过于激进的坐标修正 (95% -> 93% 会导致点到输入框上方)
|
||||
# 输入框中心通常在 96%-97% 左右
|
||||
if y > height * 0.99:
|
||||
logger.warning(f"检测到的输入框坐标 y={y} 过于靠底,修正为 {height * 0.97}")
|
||||
y = int(height * 0.97)
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
logger.info(f"设备连接成功: {d.info.get('serial')}")
|
||||
|
||||
# 1. 截图:点击输入框前
|
||||
take_debug_screenshot(d, "coord_01_before_click_input")
|
||||
|
||||
# 点击输入框
|
||||
logger.info(f"点击坐标: ({x}, {y})")
|
||||
d.click(x, y)
|
||||
time.sleep(0.5)
|
||||
d.click(x, y) # Double click
|
||||
|
||||
# 等待键盘弹出
|
||||
time.sleep(1.5)
|
||||
|
||||
# 2. 截图:点击输入框后
|
||||
take_debug_screenshot(d, "coord_02_after_click_input_keyboard")
|
||||
|
||||
# 输入文本
|
||||
logger.info(f"输入文本 (SendKeys): {text}")
|
||||
try:
|
||||
d.send_keys(text)
|
||||
except Exception as sk_e:
|
||||
logger.error(f"send_keys 失败: {sk_e}")
|
||||
|
||||
# 3. 截图:输入文本后
|
||||
take_debug_screenshot(d, "coord_03_after_input_text")
|
||||
|
||||
time.sleep(0.5)
|
||||
d.press("enter")
|
||||
|
||||
# 尝试查找发送按钮并点击
|
||||
try:
|
||||
if d(text="发送").exists:
|
||||
d(text="发送").click()
|
||||
logger.info("已点击 '发送' 按钮 (Native Text)")
|
||||
take_debug_screenshot(d, "coord_04_after_click_send_native")
|
||||
else:
|
||||
logger.info("未找到 '发送' 文本控件,尝试图像识别...")
|
||||
send_btn_coords = find_send_button(d)
|
||||
if send_btn_coords:
|
||||
sx, sy = send_btn_coords
|
||||
d.click(sx, sy)
|
||||
logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}")
|
||||
take_debug_screenshot(d, "coord_04_after_click_send_image")
|
||||
else:
|
||||
width, height = d.window_size()
|
||||
fallback_x = int(width * 0.9)
|
||||
fallback_y = int(height * 0.965)
|
||||
logger.info(f"未识别到发送按钮,尝试盲点右下角: {fallback_x}, {fallback_y}")
|
||||
d.click(fallback_x, fallback_y)
|
||||
take_debug_screenshot(d, "coord_04_after_click_send_fallback")
|
||||
except Exception as e:
|
||||
logger.error(f"点击发送按钮失败: {e}")
|
||||
|
||||
logger.info("输入完成 (Coordinate)")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"自动化操作失败: {e}")
|
||||
|
||||
def analyze_chat_image(image_path, output_path, target_name="对方"):
|
||||
"""
|
||||
识别微信聊天截图中的头像并画框,识别对话内容
|
||||
"""
|
||||
logger.info(f"正在分析图片: {image_path}")
|
||||
|
||||
# 读取图片
|
||||
# 注意:cv2.imread 不支持中文路径,需要用 np.fromfile 读取
|
||||
# 读取图片(支持中文路径)
|
||||
try:
|
||||
img_data = np.fromfile(image_path, dtype=np.uint8)
|
||||
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
|
||||
@@ -29,10 +410,16 @@ def analyze_chat_image(image_path, output_path):
|
||||
if img is None:
|
||||
logger.error("图片读取为空")
|
||||
return
|
||||
|
||||
|
||||
# 备份一份干净的图片用于 OCR (避免识别到画上去的框)
|
||||
img_clean = img.copy()
|
||||
|
||||
height, width = img.shape[:2]
|
||||
logger.info(f"图片尺寸: {width}x{height}")
|
||||
|
||||
# 调用输入框识别,获取坐标和可视化框
|
||||
input_center, input_rect = find_input_box_center(image_path)
|
||||
|
||||
# 1. 预处理
|
||||
# 转为灰度图
|
||||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||||
@@ -49,7 +436,8 @@ def analyze_chat_image(image_path, output_path):
|
||||
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
logger.info(f"检测到轮廓数量: {len(contours)}")
|
||||
|
||||
avatar_count = 0
|
||||
# 收集所有符合条件的头像
|
||||
avatars = []
|
||||
|
||||
# 3. 筛选轮廓
|
||||
for contour in contours:
|
||||
@@ -69,7 +457,7 @@ def analyze_chat_image(image_path, output_path):
|
||||
# 排除底部输入框区域 (假设底部 10% 为输入区域)
|
||||
if y > height * 0.9:
|
||||
continue
|
||||
|
||||
|
||||
# 左侧头像:靠左边 (x < width * 0.18)
|
||||
# 右侧头像:靠右边 (x > width * 0.82)
|
||||
is_left = x < width * 0.18
|
||||
@@ -77,22 +465,124 @@ def analyze_chat_image(image_path, output_path):
|
||||
|
||||
if 0.8 <= aspect_ratio <= 1.2 and min_w < w < max_w:
|
||||
if is_left or is_right:
|
||||
# 确定颜色
|
||||
# 左侧:蓝色 (BGR: 255, 0, 0)
|
||||
# 右侧:黄色 (BGR: 0, 255, 255)
|
||||
color = (255, 0, 0) if is_left else (0, 255, 255)
|
||||
|
||||
# 绘制矩形框,线宽为 3
|
||||
cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
|
||||
|
||||
label = "Left" if is_left else "Right"
|
||||
avatar_count += 1
|
||||
logger.info(f"找到头像: 位置=({x},{y}), 尺寸={w}x{h}, 侧别={label}")
|
||||
side = "Left" if is_left else "Right"
|
||||
avatars.append({
|
||||
'x': x, 'y': y, 'w': w, 'h': h,
|
||||
'side': side
|
||||
})
|
||||
|
||||
logger.info(f"共标记了 {avatar_count} 个头像")
|
||||
# 按 y 坐标排序
|
||||
avatars.sort(key=lambda a: a['y'])
|
||||
|
||||
logger.info(f"找到有效头像数量: {len(avatars)}")
|
||||
|
||||
# 4. 保存结果
|
||||
# 初始化 OCR
|
||||
try:
|
||||
reader = get_easyocr_reader(gpu=True)
|
||||
logger.info("OCR 初始化成功")
|
||||
except Exception as e:
|
||||
logger.error(f"OCR 初始化失败: {e}")
|
||||
reader = None
|
||||
|
||||
dialogue_log = []
|
||||
|
||||
# 4. 绘制对话内容框 (Green/Red Boxes)
|
||||
if avatars:
|
||||
i = 0
|
||||
while i < len(avatars):
|
||||
current_group_start = i
|
||||
current_side = avatars[i]['side']
|
||||
|
||||
# 找到当前组的结束位置 (即下一个不同侧头像的索引)
|
||||
j = i + 1
|
||||
while j < len(avatars) and avatars[j]['side'] == current_side:
|
||||
j += 1
|
||||
|
||||
# Start Y: 当前组第一个头像的上方 (例如 -10px)
|
||||
start_y = max(0, avatars[i]['y'] - 10)
|
||||
|
||||
# End Y
|
||||
if j < len(avatars):
|
||||
end_y = max(start_y + 10, avatars[j]['y'] - 30)
|
||||
else:
|
||||
# 最后一个框的底边,使用输入框的上沿
|
||||
if input_rect:
|
||||
_, input_y, _, _ = input_rect
|
||||
end_y = max(start_y + 10, input_y - 10)
|
||||
else:
|
||||
end_y = int(height * 0.9) # 默认
|
||||
|
||||
# 绘制大框
|
||||
# 左侧 (Left) -> 对方 -> 绿色 (0, 255, 0)
|
||||
# 右侧 (Right) -> 我 -> 红色 (0, 0, 255)
|
||||
box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)
|
||||
|
||||
cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)
|
||||
|
||||
logger.info(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
|
||||
|
||||
# OCR 识别区域文本
|
||||
if reader:
|
||||
try:
|
||||
safe_start_y = max(0, start_y)
|
||||
safe_end_y = min(height, end_y)
|
||||
|
||||
if safe_end_y > safe_start_y:
|
||||
roi_img = img_clean[safe_start_y:safe_end_y, 0:width]
|
||||
|
||||
# 识别
|
||||
results = reader.read_text(roi_img)
|
||||
|
||||
# 过滤关键词
|
||||
ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]
|
||||
|
||||
texts = []
|
||||
for res in results:
|
||||
text = res[1]
|
||||
prob = res[2]
|
||||
if prob > 0.3:
|
||||
# 1. 检查是否包含屏蔽词
|
||||
if any(kw in text for kw in ignore_keywords):
|
||||
continue
|
||||
|
||||
# 2. 检查是否为单行时间
|
||||
if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
|
||||
continue
|
||||
|
||||
texts.append(text)
|
||||
|
||||
combined_text = " ".join(texts)
|
||||
|
||||
if combined_text.strip():
|
||||
role = target_name if current_side == "Left" else "我"
|
||||
dialogue_log.append(f"{role}: {combined_text}")
|
||||
logger.info(f" -> OCR结果: {combined_text}")
|
||||
except Exception as e:
|
||||
logger.error(f" -> OCR出错: {e}")
|
||||
|
||||
# 移动到下一组
|
||||
i = j
|
||||
|
||||
# 5. 绘制头像框 (Blue/Yellow Boxes) - 画在内容框之上
|
||||
for av in avatars:
|
||||
x, y, w, h = av['x'], av['y'], av['w'], av['h']
|
||||
color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)
|
||||
|
||||
cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
|
||||
logger.info(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")
|
||||
|
||||
# 6. 保存结果
|
||||
try:
|
||||
# 如果有识别到输入框,画出来 (紫色)
|
||||
if input_rect:
|
||||
rx, ry, rw, rh = input_rect
|
||||
cv2.rectangle(img, (rx, ry), (rx + rw, ry + rh), (255, 0, 255), 5)
|
||||
# 画中心点
|
||||
if input_center:
|
||||
cx, cy = input_center
|
||||
cv2.circle(img, (cx, cy), 10, (255, 0, 255), -1)
|
||||
logger.info(f"已绘制输入框标记: {input_rect}")
|
||||
|
||||
# cv2.imwrite 不支持中文路径,使用 imencode + tofile
|
||||
ext = os.path.splitext(output_path)[1]
|
||||
cv2.imencode(ext, img)[1].tofile(output_path)
|
||||
@@ -100,6 +590,12 @@ def analyze_chat_image(image_path, output_path):
|
||||
except Exception as e:
|
||||
logger.error(f"保存分析图片失败: {e}")
|
||||
|
||||
logger.info("\n" + "="*30)
|
||||
logger.info("对话内容汇总:")
|
||||
for line in dialogue_log:
|
||||
logger.info(line)
|
||||
logger.info("="*30 + "\n")
|
||||
|
||||
def main():
|
||||
logger.info("开始执行微信搜索测试...")
|
||||
|
||||
@@ -197,11 +693,25 @@ def main():
|
||||
logger.info(f"✅ 原始截图已保存: {save_path}")
|
||||
|
||||
# 6. 分析截图并标记头像
|
||||
logger.info("步骤 6: 自动标记头像...")
|
||||
logger.info("步骤 6: 自动标记头像和识别内容...")
|
||||
analyzed_filename = f"chat_result_{timestamp}_analyzed.jpg"
|
||||
analyzed_path = os.path.join(screenshot_dir, analyzed_filename)
|
||||
|
||||
analyze_chat_image(save_path, analyzed_path)
|
||||
analyze_chat_image(save_path, analyzed_path, target_name=target_name)
|
||||
|
||||
# 7. 查找输入框并执行自动化操作
|
||||
logger.info("步骤 7: 自动回复...")
|
||||
# 注意:analyze_chat_image 内部已经调用了 find_input_box_center 并且画在图上了
|
||||
# 但我们需要返回值来执行操作。analyze_chat_image 并没有返回坐标。
|
||||
# 为了简单,再次调用一次 (或者修改 analyze_chat_image 返回坐标,但改动较大)
|
||||
# 这里直接再次调用 find_input_box_center 获取坐标
|
||||
coords, _ = find_input_box_center(save_path)
|
||||
|
||||
if coords:
|
||||
logger.info(f"输入框坐标: {coords}")
|
||||
perform_input_action(coords, "AI助手我现在可以开始和你聊天了!")
|
||||
else:
|
||||
logger.warning("未找到输入框坐标,跳过回复")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"❌ 截图或分析失败: {e}")
|
||||
|
||||
@@ -126,9 +126,9 @@ def analyze_chat_image(image_path, output_path):
|
||||
# 注意 OpenCV 颜色是 BGR
|
||||
box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)
|
||||
|
||||
# 绘制矩形 (空心,线宽 2)
|
||||
# 绘制矩形 (空心,线宽 5)
|
||||
# X 轴范围:0 到 width
|
||||
cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 2)
|
||||
cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)
|
||||
|
||||
logger.info(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
|
||||
|
||||
@@ -142,7 +142,7 @@ def analyze_chat_image(image_path, output_path):
|
||||
# 右侧:黄色 (BGR: 0, 255, 255)
|
||||
color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)
|
||||
|
||||
cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
|
||||
cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
|
||||
logger.info(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")
|
||||
|
||||
logger.info(f"共标记了 {len(avatars)} 个头像")
|
||||
BIN
Test/__pycache__/T1_StartWeiXin.cpython-310.pyc
Normal file
BIN
Test/__pycache__/T1_StartWeiXin.cpython-310.pyc
Normal file
Binary file not shown.
@@ -1,24 +1,126 @@
|
||||
# coding=utf-8
|
||||
import cv2
|
||||
import numpy as np
|
||||
import sys
|
||||
import os
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
try:
|
||||
import uiautomator2 as u2
|
||||
except ImportError:
|
||||
u2 = None
|
||||
|
||||
def analyze_chat_image(image_path, output_path):
|
||||
print(f"正在读取图片: {image_path}")
|
||||
|
||||
# 读取图片
|
||||
# 注意:cv2.imread 不支持中文路径,需要用 np.fromfile 读取
|
||||
# 添加项目根目录到 sys.path 以便导入 Util
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
from Util.EasyOcrKit import get_easyocr_reader
|
||||
|
||||
# 配置日志
|
||||
logging.basicConfig(level=logging.INFO, format='%(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def find_input_box_center(image_path):
|
||||
"""
|
||||
识别底部输入框的中心坐标
|
||||
"""
|
||||
try:
|
||||
img_data = np.fromfile(image_path, dtype=np.uint8)
|
||||
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
|
||||
if img is None:
|
||||
return None
|
||||
|
||||
height, width = img.shape[:2]
|
||||
|
||||
# 截取底部 15% 区域
|
||||
bottom_h = int(height * 0.15)
|
||||
crop_y_start = height - bottom_h
|
||||
crop = img[crop_y_start:height, 0:width]
|
||||
|
||||
# 预处理
|
||||
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
|
||||
# 简单二值化,输入框通常是浅色或深色背景上的反色
|
||||
# 这里假设深色模式下,输入框可能较亮,或者有边框
|
||||
# 尝试自适应阈值
|
||||
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
||||
cv2.THRESH_BINARY_INV, 11, 2)
|
||||
|
||||
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
best_cnt = None
|
||||
max_area = 0
|
||||
|
||||
for cnt in contours:
|
||||
x, y, w, h = cv2.boundingRect(cnt)
|
||||
# 筛选条件:宽度较大 (比如 > 50% 屏幕宽),高度适中
|
||||
if w > width * 0.5 and h > 20:
|
||||
if w * h > max_area:
|
||||
max_area = w * h
|
||||
best_cnt = (x, y, w, h)
|
||||
|
||||
if best_cnt:
|
||||
x, y, w, h = best_cnt
|
||||
center_x = x + w // 2
|
||||
center_y = crop_y_start + y + h // 2
|
||||
logger.info(f"找到输入框: ({center_x}, {center_y}), 尺寸: {w}x{h}")
|
||||
return center_x, center_y
|
||||
else:
|
||||
# 兜底:返回底部区域中心
|
||||
logger.warning("未找到明显输入框轮廓,使用默认坐标")
|
||||
return width // 2, int(height * 0.95)
|
||||
|
||||
except Exception as e:
|
||||
print(f"读取图片失败: {e}")
|
||||
logger.error(f"查找输入框失败: {e}")
|
||||
return None
|
||||
|
||||
def perform_input_action(coords, text):
|
||||
"""
|
||||
点击坐标并输入文本
|
||||
"""
|
||||
if u2 is None:
|
||||
logger.error("未安装 uiautomator2 库,无法执行自动化操作")
|
||||
return
|
||||
|
||||
if img is None:
|
||||
print("图片读取为空")
|
||||
if not coords:
|
||||
logger.error("坐标无效,无法执行点击输入")
|
||||
return
|
||||
|
||||
x, y = coords
|
||||
try:
|
||||
# 连接设备 (默认连接第一个 USB 设备)
|
||||
d = u2.connect()
|
||||
logger.info(f"设备连接成功: {d.info.get('serial')}")
|
||||
|
||||
# 点击输入框
|
||||
logger.info(f"点击坐标: ({x}, {y})")
|
||||
d.click(x, y)
|
||||
|
||||
# 等待键盘弹出或输入框激活
|
||||
time.sleep(1)
|
||||
|
||||
# 输入文本
|
||||
logger.info(f"输入文本: {text}")
|
||||
d.send_keys(text)
|
||||
|
||||
# 尝试发送回车键以确认输入 (视具体情况而定)
|
||||
# d.press("enter")
|
||||
|
||||
logger.info("输入完成")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"自动化操作失败: {e}")
|
||||
print(f"自动化操作失败: {e}")
|
||||
|
||||
def analyze_chat_image(image_path, output_path, target_name="对方"):
|
||||
# 读取图片(支持中文路径)
|
||||
img_data = np.fromfile(image_path, dtype=np.uint8)
|
||||
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
|
||||
|
||||
# 备份一份干净的图片用于 OCR (避免识别到画上去的框)
|
||||
img_clean = img.copy()
|
||||
|
||||
height, width = img.shape[:2]
|
||||
print(f"图片尺寸: {width}x{height}")
|
||||
|
||||
@@ -79,6 +181,16 @@ def analyze_chat_image(image_path, output_path):
|
||||
|
||||
print(f"找到有效头像数量: {len(avatars)}")
|
||||
|
||||
# 初始化 OCR
|
||||
try:
|
||||
reader = get_easyocr_reader(gpu=True)
|
||||
print("OCR 初始化成功")
|
||||
except Exception as e:
|
||||
print(f"OCR 初始化失败: {e}")
|
||||
reader = None
|
||||
|
||||
dialogue_log = []
|
||||
|
||||
# 4. 绘制对话内容框 (Green/Red Boxes)
|
||||
# 策略:按顺序遍历头像,如果发现同侧连续,则视为一组。
|
||||
# 从当前组的第一个头像上方开始,直到下一个不同侧的头像上方(或底部)。
|
||||
@@ -122,6 +234,49 @@ def analyze_chat_image(image_path, output_path):
|
||||
|
||||
print(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
|
||||
|
||||
# OCR 识别区域文本
|
||||
if reader:
|
||||
try:
|
||||
# 从原图(img_clean)裁剪区域
|
||||
# 注意边界检查
|
||||
safe_start_y = max(0, start_y)
|
||||
safe_end_y = min(height, end_y)
|
||||
|
||||
if safe_end_y > safe_start_y:
|
||||
roi_img = img_clean[safe_start_y:safe_end_y, 0:width]
|
||||
|
||||
# 识别
|
||||
results = reader.read_text(roi_img)
|
||||
|
||||
# 过滤关键词
|
||||
ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]
|
||||
|
||||
# 过滤并拼接文本 (置信度 > 0.3)
|
||||
texts = []
|
||||
for res in results:
|
||||
text = res[1]
|
||||
prob = res[2]
|
||||
if prob > 0.3:
|
||||
# 1. 检查是否包含屏蔽词
|
||||
if any(kw in text for kw in ignore_keywords):
|
||||
continue
|
||||
|
||||
# 2. 检查是否为单行时间 (如 11:35, 09:00)
|
||||
# 正则匹配: 只有时间格式,没有其他文字
|
||||
if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
|
||||
continue
|
||||
|
||||
texts.append(text)
|
||||
|
||||
combined_text = " ".join(texts)
|
||||
|
||||
if combined_text.strip():
|
||||
role = target_name if current_side == "Left" else "我"
|
||||
dialogue_log.append(f"{role}: {combined_text}")
|
||||
print(f" -> OCR结果: {combined_text}")
|
||||
except Exception as e:
|
||||
print(f" -> OCR出错: {e}")
|
||||
|
||||
# 移动到下一组
|
||||
i = j
|
||||
|
||||
@@ -144,10 +299,32 @@ def analyze_chat_image(image_path, output_path):
|
||||
except Exception as e:
|
||||
print(f"保存图片失败: {e}")
|
||||
|
||||
print("\n" + "="*30)
|
||||
print("对话内容汇总:")
|
||||
for line in dialogue_log:
|
||||
print(line)
|
||||
print("="*30 + "\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 输入文件路径
|
||||
input_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_20260121_113553.jpg"
|
||||
# 输出文件路径
|
||||
output_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_analyzed.jpg"
|
||||
|
||||
analyze_chat_image(input_file, output_file)
|
||||
# 目标联系人名称 (对应搜索关键字)
|
||||
target_name = "糖豆爸爸"
|
||||
|
||||
analyze_chat_image(input_file, output_file, target_name=target_name)
|
||||
|
||||
# 2. 查找输入框并执行自动化操作
|
||||
print("\n" + "="*30)
|
||||
print("开始执行自动化输入...")
|
||||
|
||||
# 注意: 这里使用 input_file (截图) 来定位坐标
|
||||
# 前提是截图时的界面布局与当前设备界面一致
|
||||
coords = find_input_box_center(input_file)
|
||||
if coords:
|
||||
print(f"输入框坐标: {coords}")
|
||||
perform_input_action(coords, "AI助手我现在可以开始和你聊天了!")
|
||||
else:
|
||||
print("未找到输入框坐标")
|
||||
|
||||
BIN
Test/temp_send_check.jpg
Normal file
BIN
Test/temp_send_check.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 66 KiB |
Reference in New Issue
Block a user