This commit is contained in:
HuangHai
2026-01-21 14:13:26 +08:00
parent 701cdb1dd0
commit af46512212
7 changed files with 721 additions and 34 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 158 KiB

View File

@@ -6,19 +6,400 @@ import sys
import os
import cv2
import numpy as np
import re
# 添加项目根目录到 sys.path 以便导入 Util
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
from Util.EasyOcrKit import get_easyocr_reader
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("TestWeChat")
def analyze_chat_image(image_path, output_path):
def find_input_box_center(image_path):
"""
识别微信聊天截图中的头像并画框
识别底部输入框的中心坐标,返回 (center_x, center_y, rect_box)
rect_box 用于可视化: (x, y, w, h) (相对于原图)
"""
try:
img_data = np.fromfile(image_path, dtype=np.uint8)
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
if img is None:
return None, None
height, width = img.shape[:2]
# 截取底部 12% 区域 (缩小范围以精准定位)
bottom_h = int(height * 0.12)
crop_y_start = height - bottom_h
crop = img[crop_y_start:height, 0:width]
# 预处理
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
# 策略1: Canny 边缘检测 (对低对比度 UI 更有效)
edges = cv2.Canny(gray, 50, 150)
# 膨胀连接断裂的边缘
kernel = np.ones((3,3), np.uint8)
dilated = cv2.dilate(edges, kernel, iterations=1)
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
best_cnt = None
max_area = 0
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# 筛选条件优化:
# 1. 宽度: 屏幕宽度的 50% - 98% (通常输入框很长)
# 2. 高度: 30px - 底部区域的 90%
if width * 0.5 < w < width * 0.98 and 30 < h < bottom_h * 0.9:
# 计算中心点 Y 坐标相对于全图
global_y = crop_y_start + y + h // 2
# 排除过于靠下的区域 (导航栏/手势条),通常在最后 2%
if global_y > height * 0.98:
continue
if w * h > max_area:
max_area = w * h
best_cnt = (x, y, w, h)
if best_cnt:
x, y, w, h = best_cnt
center_x = x + w // 2
center_y = crop_y_start + y + h // 2
logger.info(f"找到输入框(CV-Canny): ({center_x}, {center_y}), 尺寸: {w}x{h}")
return (center_x, center_y), (x, crop_y_start + y, w, h)
# 策略2: 自适应阈值 (原有逻辑作为备份)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
contours_thresh, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours_thresh:
x, y, w, h = cv2.boundingRect(cnt)
if width * 0.4 < w < width * 0.95 and 20 < h < bottom_h * 0.8:
global_y = crop_y_start + y + h // 2
if global_y > height * 0.98:
continue
if w * h > max_area:
max_area = w * h
best_cnt = (x, y, w, h)
if best_cnt:
x, y, w, h = best_cnt
center_x = x + w // 2
center_y = crop_y_start + y + h // 2
logger.info(f"找到输入框(CV-Adaptive): ({center_x}, {center_y}), 尺寸: {w}x{h}")
return (center_x, center_y), (x, crop_y_start + y, w, h)
# 兜底策略:使用更靠下的默认坐标 (94%)
# 之前的 90.5% 用户反馈偏上
logger.warning("未找到明显输入框轮廓,使用更靠下的默认坐标 (94%)")
default_y = int(height * 0.94)
center_x = width // 2
# 构造假想框
fake_w = int(width * 0.7)
fake_h = int(height * 0.08) # 稍微加高一点,视觉上更像
fake_x = (width - fake_w) // 2
fake_y = default_y - fake_h // 2
return (center_x, default_y), (fake_x, fake_y, fake_w, fake_h)
except Exception as e:
logger.error(f"查找输入框失败: {e}")
return None, None
def find_send_button(d):
"""
截图并寻找发送按钮 (绿色按钮)
扩大搜索范围以适应键盘弹出的情况
"""
try:
# 截图到 Screenshots 目录方便调试
screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
if not os.path.exists(screenshot_dir):
os.makedirs(screenshot_dir)
timestamp = time.strftime("%Y%m%d_%H%M%S")
debug_shot_path = os.path.join(screenshot_dir, f"debug_send_check_{timestamp}.jpg")
d.screenshot(debug_shot_path)
logger.info(f"发送按钮查找调试截图已保存: {debug_shot_path}")
img = cv2.imread(debug_shot_path)
if img is None:
return None
h, w = img.shape[:2]
# ROI: 底部 60% (考虑到键盘弹出,按钮可能被顶上去)
# 且只关注右侧 30%
roi_h = int(h * 0.6)
roi_w = int(w * 0.3)
y_start = h - roi_h
x_start = w - roi_w
roi = img[y_start:h, x_start:w]
# 转换 HSV
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# 绿色范围 (WeChat Green)
lower_green = np.array([35, 80, 80])
upper_green = np.array([90, 255, 255])
mask = cv2.inRange(hsv, lower_green, upper_green)
# 查找轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
# 找符合条件的轮廓
valid_candidates = []
for cnt in contours:
area = cv2.contourArea(cnt)
x, y, cw, ch = cv2.boundingRect(cnt)
# 过滤太小的噪点和太大的区域(例如全屏背景)
# 发送按钮通常面积在 2000-15000 之间 (视分辨率而定)
if 500 < area < 30000:
# 宽高比检查:发送按钮通常接近正方形或微扁 (ratio < 2.5)
ratio = float(cw) / ch
if 0.5 < ratio < 3.0:
# 坐标还原到原图
global_y = y_start + y
valid_candidates.append({
'cnt': cnt,
'area': area,
'y': global_y,
'rect': (x, y, cw, ch)
})
if valid_candidates:
# 核心逻辑:发送按钮一定是所有绿色元素中最靠下的 (Y坐标最大)
# 且在最右侧
# 先按 Y 坐标降序排序
valid_candidates.sort(key=lambda c: c['y'], reverse=True)
# 取最靠下的一个 (可能是发送按钮)
best = valid_candidates[0]
# 获取中心点
bx, by, bw, bh = best['rect']
cx = x_start + bx + bw // 2
cy = y_start + by + bh // 2
logger.info(f"通过图像识别找到发送按钮 (Bottom-Most): ({cx}, {cy}), 面积: {best['area']}")
return cx, cy
logger.warning("未通过图像识别找到绿色发送按钮")
return None
except Exception as e:
logger.error(f"查找发送按钮出错: {e}")
return None
def take_debug_screenshot(d, step_name):
"""
调试专用截图函数
"""
try:
screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
if not os.path.exists(screenshot_dir):
os.makedirs(screenshot_dir)
timestamp = time.strftime("%Y%m%d_%H%M%S")
filename = f"debug_{timestamp}_{step_name}.jpg"
save_path = os.path.join(screenshot_dir, filename)
d.screenshot(save_path)
logger.info(f"📸 [调试截图] {step_name} 已保存: {filename}")
return save_path
except Exception as e:
logger.error(f"截图失败 ({step_name}): {e}")
return None
def perform_input_action(coords, text):
"""
点击坐标并输入文本
"""
# 优先尝试使用 uiautomator2 的原生控件查找 (更稳健)
native_success = False
try:
d = u2.connect()
# 查找 EditText 控件
input_elem = d(className="android.widget.EditText")
if input_elem.exists:
logger.info("发现原生输入框控件,尝试点击...")
# 1. 截图:点击前
take_debug_screenshot(d, "native_01_before_click")
# 双击策略
input_elem.click()
time.sleep(0.5)
input_elem.click()
time.sleep(1)
# 2. 截图:点击后 (预期键盘弹出)
take_debug_screenshot(d, "native_02_after_click_keyboard")
logger.info(f"输入文本: {text}")
# 尝试 set_text + send_keys 组合
try:
input_elem.set_text(text)
except:
pass
time.sleep(0.5)
# 检查文本是否输入成功,如果没有,尝试 send_keys
try:
current_text = input_elem.get_text()
if not current_text or current_text != text:
logger.warning(f"set_text 似乎未生效 (当前: {current_text}),尝试 send_keys...")
d.send_keys(text)
except:
d.send_keys(text)
# 3. 截图:输入文本后
take_debug_screenshot(d, "native_03_after_text_input")
# 尝试发送回车键
time.sleep(0.5)
d.press("enter")
# 尝试点击发送按钮
try:
if d(text="发送").exists:
d(text="发送").click()
logger.info("已点击 '发送' 按钮 (Native Text)")
take_debug_screenshot(d, "native_04_after_send_click_text")
else:
logger.info("未找到 '发送' 文本控件,尝试图像识别...")
send_btn_coords = find_send_button(d)
if send_btn_coords:
sx, sy = send_btn_coords
d.click(sx, sy)
logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}")
take_debug_screenshot(d, "native_04_after_send_click_image")
else:
width, height = d.window_size()
fallback_x = int(width * 0.9)
fallback_y = int(height * 0.965)
logger.info(f"未识别到发送按钮,尝试盲点右下角: {fallback_x}, {fallback_y}")
d.click(fallback_x, fallback_y)
take_debug_screenshot(d, "native_04_after_send_click_fallback")
except Exception as e:
logger.error(f"点击发送按钮失败: {e}")
logger.info("输入完成 (Native)")
native_success = True
return
else:
logger.warning("未找到输入框元素 (Native),转入坐标点击模式...")
except Exception as e:
logger.warning(f"原生控件操作失败,降级为坐标点击: {e}")
if native_success:
return
# 降级方案:使用坐标点击
if not coords:
logger.error("坐标无效,无法执行点击输入")
return
x, y = coords
# 坐标安全检查
if 'd' not in locals():
d = u2.connect()
try:
width, height = d.window_size()
# 移除过于激进的坐标修正 (95% -> 93% 会导致点到输入框上方)
# 输入框中心通常在 96%-97% 左右
if y > height * 0.99:
logger.warning(f"检测到的输入框坐标 y={y} 过于靠底,修正为 {height * 0.97}")
y = int(height * 0.97)
except:
pass
try:
logger.info(f"设备连接成功: {d.info.get('serial')}")
# 1. 截图:点击输入框前
take_debug_screenshot(d, "coord_01_before_click_input")
# 点击输入框
logger.info(f"点击坐标: ({x}, {y})")
d.click(x, y)
time.sleep(0.5)
d.click(x, y) # Double click
# 等待键盘弹出
time.sleep(1.5)
# 2. 截图:点击输入框后
take_debug_screenshot(d, "coord_02_after_click_input_keyboard")
# 输入文本
logger.info(f"输入文本 (SendKeys): {text}")
try:
d.send_keys(text)
except Exception as sk_e:
logger.error(f"send_keys 失败: {sk_e}")
# 3. 截图:输入文本后
take_debug_screenshot(d, "coord_03_after_input_text")
time.sleep(0.5)
d.press("enter")
# 尝试查找发送按钮并点击
try:
if d(text="发送").exists:
d(text="发送").click()
logger.info("已点击 '发送' 按钮 (Native Text)")
take_debug_screenshot(d, "coord_04_after_click_send_native")
else:
logger.info("未找到 '发送' 文本控件,尝试图像识别...")
send_btn_coords = find_send_button(d)
if send_btn_coords:
sx, sy = send_btn_coords
d.click(sx, sy)
logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}")
take_debug_screenshot(d, "coord_04_after_click_send_image")
else:
width, height = d.window_size()
fallback_x = int(width * 0.9)
fallback_y = int(height * 0.965)
logger.info(f"未识别到发送按钮,尝试盲点右下角: {fallback_x}, {fallback_y}")
d.click(fallback_x, fallback_y)
take_debug_screenshot(d, "coord_04_after_click_send_fallback")
except Exception as e:
logger.error(f"点击发送按钮失败: {e}")
logger.info("输入完成 (Coordinate)")
except Exception as e:
logger.error(f"自动化操作失败: {e}")
def analyze_chat_image(image_path, output_path, target_name="对方"):
"""
识别微信聊天截图中的头像并画框,识别对话内容
"""
logger.info(f"正在分析图片: {image_path}")
# 读取图片
# 注意cv2.imread 不支持中文路径,需要用 np.fromfile 读取
# 读取图片(支持中文路径)
try:
img_data = np.fromfile(image_path, dtype=np.uint8)
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
@@ -29,10 +410,16 @@ def analyze_chat_image(image_path, output_path):
if img is None:
logger.error("图片读取为空")
return
# 备份一份干净的图片用于 OCR (避免识别到画上去的框)
img_clean = img.copy()
height, width = img.shape[:2]
logger.info(f"图片尺寸: {width}x{height}")
# 调用输入框识别,获取坐标和可视化框
input_center, input_rect = find_input_box_center(image_path)
# 1. 预处理
# 转为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -49,7 +436,8 @@ def analyze_chat_image(image_path, output_path):
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
logger.info(f"检测到轮廓数量: {len(contours)}")
avatar_count = 0
# 收集所有符合条件的头像
avatars = []
# 3. 筛选轮廓
for contour in contours:
@@ -69,7 +457,7 @@ def analyze_chat_image(image_path, output_path):
# 排除底部输入框区域 (假设底部 10% 为输入区域)
if y > height * 0.9:
continue
# 左侧头像:靠左边 (x < width * 0.18)
# 右侧头像:靠右边 (x > width * 0.82)
is_left = x < width * 0.18
@@ -77,22 +465,124 @@ def analyze_chat_image(image_path, output_path):
if 0.8 <= aspect_ratio <= 1.2 and min_w < w < max_w:
if is_left or is_right:
# 确定颜色
# 左侧:蓝色 (BGR: 255, 0, 0)
# 右侧:黄色 (BGR: 0, 255, 255)
color = (255, 0, 0) if is_left else (0, 255, 255)
# 绘制矩形框,线宽为 3
cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
label = "Left" if is_left else "Right"
avatar_count += 1
logger.info(f"找到头像: 位置=({x},{y}), 尺寸={w}x{h}, 侧别={label}")
side = "Left" if is_left else "Right"
avatars.append({
'x': x, 'y': y, 'w': w, 'h': h,
'side': side
})
logger.info(f"共标记了 {avatar_count} 个头像")
# 按 y 坐标排序
avatars.sort(key=lambda a: a['y'])
logger.info(f"找到有效头像数量: {len(avatars)}")
# 4. 保存结果
# 初始化 OCR
try:
reader = get_easyocr_reader(gpu=True)
logger.info("OCR 初始化成功")
except Exception as e:
logger.error(f"OCR 初始化失败: {e}")
reader = None
dialogue_log = []
# 4. 绘制对话内容框 (Green/Red Boxes)
if avatars:
i = 0
while i < len(avatars):
current_group_start = i
current_side = avatars[i]['side']
# 找到当前组的结束位置 (即下一个不同侧头像的索引)
j = i + 1
while j < len(avatars) and avatars[j]['side'] == current_side:
j += 1
# Start Y: 当前组第一个头像的上方 (例如 -10px)
start_y = max(0, avatars[i]['y'] - 10)
# End Y
if j < len(avatars):
end_y = max(start_y + 10, avatars[j]['y'] - 30)
else:
# 最后一个框的底边,使用输入框的上沿
if input_rect:
_, input_y, _, _ = input_rect
end_y = max(start_y + 10, input_y - 10)
else:
end_y = int(height * 0.9) # 默认
# 绘制大框
# 左侧 (Left) -> 对方 -> 绿色 (0, 255, 0)
# 右侧 (Right) -> 我 -> 红色 (0, 0, 255)
box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)
cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)
logger.info(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
# OCR 识别区域文本
if reader:
try:
safe_start_y = max(0, start_y)
safe_end_y = min(height, end_y)
if safe_end_y > safe_start_y:
roi_img = img_clean[safe_start_y:safe_end_y, 0:width]
# 识别
results = reader.read_text(roi_img)
# 过滤关键词
ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]
texts = []
for res in results:
text = res[1]
prob = res[2]
if prob > 0.3:
# 1. 检查是否包含屏蔽词
if any(kw in text for kw in ignore_keywords):
continue
# 2. 检查是否为单行时间
if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
continue
texts.append(text)
combined_text = " ".join(texts)
if combined_text.strip():
role = target_name if current_side == "Left" else ""
dialogue_log.append(f"{role}: {combined_text}")
logger.info(f" -> OCR结果: {combined_text}")
except Exception as e:
logger.error(f" -> OCR出错: {e}")
# 移动到下一组
i = j
# 5. 绘制头像框 (Blue/Yellow Boxes) - 画在内容框之上
for av in avatars:
x, y, w, h = av['x'], av['y'], av['w'], av['h']
color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)
cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
logger.info(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")
# 6. 保存结果
try:
# 如果有识别到输入框,画出来 (紫色)
if input_rect:
rx, ry, rw, rh = input_rect
cv2.rectangle(img, (rx, ry), (rx + rw, ry + rh), (255, 0, 255), 5)
# 画中心点
if input_center:
cx, cy = input_center
cv2.circle(img, (cx, cy), 10, (255, 0, 255), -1)
logger.info(f"已绘制输入框标记: {input_rect}")
# cv2.imwrite 不支持中文路径,使用 imencode + tofile
ext = os.path.splitext(output_path)[1]
cv2.imencode(ext, img)[1].tofile(output_path)
@@ -100,6 +590,12 @@ def analyze_chat_image(image_path, output_path):
except Exception as e:
logger.error(f"保存分析图片失败: {e}")
logger.info("\n" + "="*30)
logger.info("对话内容汇总:")
for line in dialogue_log:
logger.info(line)
logger.info("="*30 + "\n")
def main():
logger.info("开始执行微信搜索测试...")
@@ -197,11 +693,25 @@ def main():
logger.info(f"✅ 原始截图已保存: {save_path}")
# 6. 分析截图并标记头像
logger.info("步骤 6: 自动标记头像...")
logger.info("步骤 6: 自动标记头像和识别内容...")
analyzed_filename = f"chat_result_{timestamp}_analyzed.jpg"
analyzed_path = os.path.join(screenshot_dir, analyzed_filename)
analyze_chat_image(save_path, analyzed_path)
analyze_chat_image(save_path, analyzed_path, target_name=target_name)
# 7. 查找输入框并执行自动化操作
logger.info("步骤 7: 自动回复...")
# 注意analyze_chat_image 内部已经调用了 find_input_box_center 并且画在图上了
# 但我们需要返回值来执行操作。analyze_chat_image 并没有返回坐标。
# 为了简单,再次调用一次 (或者修改 analyze_chat_image 返回坐标,但改动较大)
# 这里直接再次调用 find_input_box_center 获取坐标
coords, _ = find_input_box_center(save_path)
if coords:
logger.info(f"输入框坐标: {coords}")
perform_input_action(coords, "AI助手我现在可以开始和你聊天了")
else:
logger.warning("未找到输入框坐标,跳过回复")
except Exception as e:
logger.error(f"❌ 截图或分析失败: {e}")

View File

@@ -126,9 +126,9 @@ def analyze_chat_image(image_path, output_path):
# 注意 OpenCV 颜色是 BGR
box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)
# 绘制矩形 (空心,线宽 2)
# 绘制矩形 (空心,线宽 5)
# X 轴范围0 到 width
cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 2)
cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)
logger.info(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
@@ -142,7 +142,7 @@ def analyze_chat_image(image_path, output_path):
# 右侧:黄色 (BGR: 0, 255, 255)
color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)
cv2.rectangle(img, (x, y), (x + w, y + h), color, 3)
cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
logger.info(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")
logger.info(f"共标记了 {len(avatars)} 个头像")

Binary file not shown.

View File

@@ -1,24 +1,126 @@
# coding=utf-8
import cv2
import numpy as np
import sys
import os
import logging
import re
import time
try:
import uiautomator2 as u2
except ImportError:
u2 = None
def analyze_chat_image(image_path, output_path):
print(f"正在读取图片: {image_path}")
# 读取图片
# 注意cv2.imread 不支持中文路径,需要用 np.fromfile 读取
# 添加项目根目录到 sys.path 以便导入 Util
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
from Util.EasyOcrKit import get_easyocr_reader
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(message)s')
logger = logging.getLogger(__name__)
def find_input_box_center(image_path):
"""
识别底部输入框的中心坐标
"""
try:
img_data = np.fromfile(image_path, dtype=np.uint8)
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
if img is None:
return None
height, width = img.shape[:2]
# 截取底部 15% 区域
bottom_h = int(height * 0.15)
crop_y_start = height - bottom_h
crop = img[crop_y_start:height, 0:width]
# 预处理
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
# 简单二值化,输入框通常是浅色或深色背景上的反色
# 这里假设深色模式下,输入框可能较亮,或者有边框
# 尝试自适应阈值
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
best_cnt = None
max_area = 0
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# 筛选条件:宽度较大 (比如 > 50% 屏幕宽),高度适中
if w > width * 0.5 and h > 20:
if w * h > max_area:
max_area = w * h
best_cnt = (x, y, w, h)
if best_cnt:
x, y, w, h = best_cnt
center_x = x + w // 2
center_y = crop_y_start + y + h // 2
logger.info(f"找到输入框: ({center_x}, {center_y}), 尺寸: {w}x{h}")
return center_x, center_y
else:
# 兜底:返回底部区域中心
logger.warning("未找到明显输入框轮廓,使用默认坐标")
return width // 2, int(height * 0.95)
except Exception as e:
print(f"读取图片失败: {e}")
logger.error(f"查找输入框失败: {e}")
return None
def perform_input_action(coords, text):
"""
点击坐标并输入文本
"""
if u2 is None:
logger.error("未安装 uiautomator2 库,无法执行自动化操作")
return
if img is None:
print("图片读取为空")
if not coords:
logger.error("坐标无效,无法执行点击输入")
return
x, y = coords
try:
# 连接设备 (默认连接第一个 USB 设备)
d = u2.connect()
logger.info(f"设备连接成功: {d.info.get('serial')}")
# 点击输入框
logger.info(f"点击坐标: ({x}, {y})")
d.click(x, y)
# 等待键盘弹出或输入框激活
time.sleep(1)
# 输入文本
logger.info(f"输入文本: {text}")
d.send_keys(text)
# 尝试发送回车键以确认输入 (视具体情况而定)
# d.press("enter")
logger.info("输入完成")
except Exception as e:
logger.error(f"自动化操作失败: {e}")
print(f"自动化操作失败: {e}")
def analyze_chat_image(image_path, output_path, target_name="对方"):
# 读取图片(支持中文路径)
img_data = np.fromfile(image_path, dtype=np.uint8)
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
# 备份一份干净的图片用于 OCR (避免识别到画上去的框)
img_clean = img.copy()
height, width = img.shape[:2]
print(f"图片尺寸: {width}x{height}")
@@ -79,6 +181,16 @@ def analyze_chat_image(image_path, output_path):
print(f"找到有效头像数量: {len(avatars)}")
# 初始化 OCR
try:
reader = get_easyocr_reader(gpu=True)
print("OCR 初始化成功")
except Exception as e:
print(f"OCR 初始化失败: {e}")
reader = None
dialogue_log = []
# 4. 绘制对话内容框 (Green/Red Boxes)
# 策略:按顺序遍历头像,如果发现同侧连续,则视为一组。
# 从当前组的第一个头像上方开始,直到下一个不同侧的头像上方(或底部)。
@@ -122,6 +234,49 @@ def analyze_chat_image(image_path, output_path):
print(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
# OCR 识别区域文本
if reader:
try:
# 从原图(img_clean)裁剪区域
# 注意边界检查
safe_start_y = max(0, start_y)
safe_end_y = min(height, end_y)
if safe_end_y > safe_start_y:
roi_img = img_clean[safe_start_y:safe_end_y, 0:width]
# 识别
results = reader.read_text(roi_img)
# 过滤关键词
ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]
# 过滤并拼接文本 (置信度 > 0.3)
texts = []
for res in results:
text = res[1]
prob = res[2]
if prob > 0.3:
# 1. 检查是否包含屏蔽词
if any(kw in text for kw in ignore_keywords):
continue
# 2. 检查是否为单行时间 (如 11:35, 09:00)
# 正则匹配: 只有时间格式,没有其他文字
if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
continue
texts.append(text)
combined_text = " ".join(texts)
if combined_text.strip():
role = target_name if current_side == "Left" else ""
dialogue_log.append(f"{role}: {combined_text}")
print(f" -> OCR结果: {combined_text}")
except Exception as e:
print(f" -> OCR出错: {e}")
# 移动到下一组
i = j
@@ -144,10 +299,32 @@ def analyze_chat_image(image_path, output_path):
except Exception as e:
print(f"保存图片失败: {e}")
print("\n" + "="*30)
print("对话内容汇总:")
for line in dialogue_log:
print(line)
print("="*30 + "\n")
if __name__ == "__main__":
# 输入文件路径
input_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_20260121_113553.jpg"
# 输出文件路径
output_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_analyzed.jpg"
analyze_chat_image(input_file, output_file)
# 目标联系人名称 (对应搜索关键字)
target_name = "糖豆爸爸"
analyze_chat_image(input_file, output_file, target_name=target_name)
# 2. 查找输入框并执行自动化操作
print("\n" + "="*30)
print("开始执行自动化输入...")
# 注意: 这里使用 input_file (截图) 来定位坐标
# 前提是截图时的界面布局与当前设备界面一致
coords = find_input_box_center(input_file)
if coords:
print(f"输入框坐标: {coords}")
perform_input_action(coords, "AI助手我现在可以开始和你聊天了")
else:
print("未找到输入框坐标")

BIN
Test/temp_send_check.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB