Files
aiData/Test/T1_StartWeiXin.py
HuangHai af46512212 'commit'
2026-01-21 14:13:26 +08:00

726 lines
27 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
import uiautomator2 as u2
import time
import logging
import sys
import os
import cv2
import numpy as np
import re
# 添加项目根目录到 sys.path 以便导入 Util
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
from Util.EasyOcrKit import get_easyocr_reader
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("TestWeChat")
def find_input_box_center(image_path):
"""
识别底部输入框的中心坐标,返回 (center_x, center_y, rect_box)
rect_box 用于可视化: (x, y, w, h) (相对于原图)
"""
try:
img_data = np.fromfile(image_path, dtype=np.uint8)
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
if img is None:
return None, None
height, width = img.shape[:2]
# 截取底部 12% 区域 (缩小范围以精准定位)
bottom_h = int(height * 0.12)
crop_y_start = height - bottom_h
crop = img[crop_y_start:height, 0:width]
# 预处理
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
# 策略1: Canny 边缘检测 (对低对比度 UI 更有效)
edges = cv2.Canny(gray, 50, 150)
# 膨胀连接断裂的边缘
kernel = np.ones((3,3), np.uint8)
dilated = cv2.dilate(edges, kernel, iterations=1)
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
best_cnt = None
max_area = 0
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# 筛选条件优化:
# 1. 宽度: 屏幕宽度的 50% - 98% (通常输入框很长)
# 2. 高度: 30px - 底部区域的 90%
if width * 0.5 < w < width * 0.98 and 30 < h < bottom_h * 0.9:
# 计算中心点 Y 坐标相对于全图
global_y = crop_y_start + y + h // 2
# 排除过于靠下的区域 (导航栏/手势条),通常在最后 2%
if global_y > height * 0.98:
continue
if w * h > max_area:
max_area = w * h
best_cnt = (x, y, w, h)
if best_cnt:
x, y, w, h = best_cnt
center_x = x + w // 2
center_y = crop_y_start + y + h // 2
logger.info(f"找到输入框(CV-Canny): ({center_x}, {center_y}), 尺寸: {w}x{h}")
return (center_x, center_y), (x, crop_y_start + y, w, h)
# 策略2: 自适应阈值 (原有逻辑作为备份)
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
contours_thresh, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours_thresh:
x, y, w, h = cv2.boundingRect(cnt)
if width * 0.4 < w < width * 0.95 and 20 < h < bottom_h * 0.8:
global_y = crop_y_start + y + h // 2
if global_y > height * 0.98:
continue
if w * h > max_area:
max_area = w * h
best_cnt = (x, y, w, h)
if best_cnt:
x, y, w, h = best_cnt
center_x = x + w // 2
center_y = crop_y_start + y + h // 2
logger.info(f"找到输入框(CV-Adaptive): ({center_x}, {center_y}), 尺寸: {w}x{h}")
return (center_x, center_y), (x, crop_y_start + y, w, h)
# 兜底策略:使用更靠下的默认坐标 (94%)
# 之前的 90.5% 用户反馈偏上
logger.warning("未找到明显输入框轮廓,使用更靠下的默认坐标 (94%)")
default_y = int(height * 0.94)
center_x = width // 2
# 构造假想框
fake_w = int(width * 0.7)
fake_h = int(height * 0.08) # 稍微加高一点,视觉上更像
fake_x = (width - fake_w) // 2
fake_y = default_y - fake_h // 2
return (center_x, default_y), (fake_x, fake_y, fake_w, fake_h)
except Exception as e:
logger.error(f"查找输入框失败: {e}")
return None, None
def find_send_button(d):
"""
截图并寻找发送按钮 (绿色按钮)
扩大搜索范围以适应键盘弹出的情况
"""
try:
# 截图到 Screenshots 目录方便调试
screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
if not os.path.exists(screenshot_dir):
os.makedirs(screenshot_dir)
timestamp = time.strftime("%Y%m%d_%H%M%S")
debug_shot_path = os.path.join(screenshot_dir, f"debug_send_check_{timestamp}.jpg")
d.screenshot(debug_shot_path)
logger.info(f"发送按钮查找调试截图已保存: {debug_shot_path}")
img = cv2.imread(debug_shot_path)
if img is None:
return None
h, w = img.shape[:2]
# ROI: 底部 60% (考虑到键盘弹出,按钮可能被顶上去)
# 且只关注右侧 30%
roi_h = int(h * 0.6)
roi_w = int(w * 0.3)
y_start = h - roi_h
x_start = w - roi_w
roi = img[y_start:h, x_start:w]
# 转换 HSV
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# 绿色范围 (WeChat Green)
lower_green = np.array([35, 80, 80])
upper_green = np.array([90, 255, 255])
mask = cv2.inRange(hsv, lower_green, upper_green)
# 查找轮廓
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if contours:
# 找符合条件的轮廓
valid_candidates = []
for cnt in contours:
area = cv2.contourArea(cnt)
x, y, cw, ch = cv2.boundingRect(cnt)
# 过滤太小的噪点和太大的区域(例如全屏背景)
# 发送按钮通常面积在 2000-15000 之间 (视分辨率而定)
if 500 < area < 30000:
# 宽高比检查:发送按钮通常接近正方形或微扁 (ratio < 2.5)
ratio = float(cw) / ch
if 0.5 < ratio < 3.0:
# 坐标还原到原图
global_y = y_start + y
valid_candidates.append({
'cnt': cnt,
'area': area,
'y': global_y,
'rect': (x, y, cw, ch)
})
if valid_candidates:
# 核心逻辑:发送按钮一定是所有绿色元素中最靠下的 (Y坐标最大)
# 且在最右侧
# 先按 Y 坐标降序排序
valid_candidates.sort(key=lambda c: c['y'], reverse=True)
# 取最靠下的一个 (可能是发送按钮)
best = valid_candidates[0]
# 获取中心点
bx, by, bw, bh = best['rect']
cx = x_start + bx + bw // 2
cy = y_start + by + bh // 2
logger.info(f"通过图像识别找到发送按钮 (Bottom-Most): ({cx}, {cy}), 面积: {best['area']}")
return cx, cy
logger.warning("未通过图像识别找到绿色发送按钮")
return None
except Exception as e:
logger.error(f"查找发送按钮出错: {e}")
return None
def take_debug_screenshot(d, step_name):
"""
调试专用截图函数
"""
try:
screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
if not os.path.exists(screenshot_dir):
os.makedirs(screenshot_dir)
timestamp = time.strftime("%Y%m%d_%H%M%S")
filename = f"debug_{timestamp}_{step_name}.jpg"
save_path = os.path.join(screenshot_dir, filename)
d.screenshot(save_path)
logger.info(f"📸 [调试截图] {step_name} 已保存: {filename}")
return save_path
except Exception as e:
logger.error(f"截图失败 ({step_name}): {e}")
return None
def perform_input_action(coords, text):
"""
点击坐标并输入文本
"""
# 优先尝试使用 uiautomator2 的原生控件查找 (更稳健)
native_success = False
try:
d = u2.connect()
# 查找 EditText 控件
input_elem = d(className="android.widget.EditText")
if input_elem.exists:
logger.info("发现原生输入框控件,尝试点击...")
# 1. 截图:点击前
take_debug_screenshot(d, "native_01_before_click")
# 双击策略
input_elem.click()
time.sleep(0.5)
input_elem.click()
time.sleep(1)
# 2. 截图:点击后 (预期键盘弹出)
take_debug_screenshot(d, "native_02_after_click_keyboard")
logger.info(f"输入文本: {text}")
# 尝试 set_text + send_keys 组合
try:
input_elem.set_text(text)
except:
pass
time.sleep(0.5)
# 检查文本是否输入成功,如果没有,尝试 send_keys
try:
current_text = input_elem.get_text()
if not current_text or current_text != text:
logger.warning(f"set_text 似乎未生效 (当前: {current_text}),尝试 send_keys...")
d.send_keys(text)
except:
d.send_keys(text)
# 3. 截图:输入文本后
take_debug_screenshot(d, "native_03_after_text_input")
# 尝试发送回车键
time.sleep(0.5)
d.press("enter")
# 尝试点击发送按钮
try:
if d(text="发送").exists:
d(text="发送").click()
logger.info("已点击 '发送' 按钮 (Native Text)")
take_debug_screenshot(d, "native_04_after_send_click_text")
else:
logger.info("未找到 '发送' 文本控件,尝试图像识别...")
send_btn_coords = find_send_button(d)
if send_btn_coords:
sx, sy = send_btn_coords
d.click(sx, sy)
logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}")
take_debug_screenshot(d, "native_04_after_send_click_image")
else:
width, height = d.window_size()
fallback_x = int(width * 0.9)
fallback_y = int(height * 0.965)
logger.info(f"未识别到发送按钮,尝试盲点右下角: {fallback_x}, {fallback_y}")
d.click(fallback_x, fallback_y)
take_debug_screenshot(d, "native_04_after_send_click_fallback")
except Exception as e:
logger.error(f"点击发送按钮失败: {e}")
logger.info("输入完成 (Native)")
native_success = True
return
else:
logger.warning("未找到输入框元素 (Native),转入坐标点击模式...")
except Exception as e:
logger.warning(f"原生控件操作失败,降级为坐标点击: {e}")
if native_success:
return
# 降级方案:使用坐标点击
if not coords:
logger.error("坐标无效,无法执行点击输入")
return
x, y = coords
# 坐标安全检查
if 'd' not in locals():
d = u2.connect()
try:
width, height = d.window_size()
# 移除过于激进的坐标修正 (95% -> 93% 会导致点到输入框上方)
# 输入框中心通常在 96%-97% 左右
if y > height * 0.99:
logger.warning(f"检测到的输入框坐标 y={y} 过于靠底,修正为 {height * 0.97}")
y = int(height * 0.97)
except:
pass
try:
logger.info(f"设备连接成功: {d.info.get('serial')}")
# 1. 截图:点击输入框前
take_debug_screenshot(d, "coord_01_before_click_input")
# 点击输入框
logger.info(f"点击坐标: ({x}, {y})")
d.click(x, y)
time.sleep(0.5)
d.click(x, y) # Double click
# 等待键盘弹出
time.sleep(1.5)
# 2. 截图:点击输入框后
take_debug_screenshot(d, "coord_02_after_click_input_keyboard")
# 输入文本
logger.info(f"输入文本 (SendKeys): {text}")
try:
d.send_keys(text)
except Exception as sk_e:
logger.error(f"send_keys 失败: {sk_e}")
# 3. 截图:输入文本后
take_debug_screenshot(d, "coord_03_after_input_text")
time.sleep(0.5)
d.press("enter")
# 尝试查找发送按钮并点击
try:
if d(text="发送").exists:
d(text="发送").click()
logger.info("已点击 '发送' 按钮 (Native Text)")
take_debug_screenshot(d, "coord_04_after_click_send_native")
else:
logger.info("未找到 '发送' 文本控件,尝试图像识别...")
send_btn_coords = find_send_button(d)
if send_btn_coords:
sx, sy = send_btn_coords
d.click(sx, sy)
logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}")
take_debug_screenshot(d, "coord_04_after_click_send_image")
else:
width, height = d.window_size()
fallback_x = int(width * 0.9)
fallback_y = int(height * 0.965)
logger.info(f"未识别到发送按钮,尝试盲点右下角: {fallback_x}, {fallback_y}")
d.click(fallback_x, fallback_y)
take_debug_screenshot(d, "coord_04_after_click_send_fallback")
except Exception as e:
logger.error(f"点击发送按钮失败: {e}")
logger.info("输入完成 (Coordinate)")
except Exception as e:
logger.error(f"自动化操作失败: {e}")
def analyze_chat_image(image_path, output_path, target_name="对方"):
"""
识别微信聊天截图中的头像并画框,识别对话内容
"""
logger.info(f"正在分析图片: {image_path}")
# 读取图片(支持中文路径)
try:
img_data = np.fromfile(image_path, dtype=np.uint8)
img = cv2.imdecode(img_data, cv2.IMREAD_COLOR)
except Exception as e:
logger.error(f"读取图片失败: {e}")
return
if img is None:
logger.error("图片读取为空")
return
# 备份一份干净的图片用于 OCR (避免识别到画上去的框)
img_clean = img.copy()
height, width = img.shape[:2]
logger.info(f"图片尺寸: {width}x{height}")
# 调用输入框识别,获取坐标和可视化框
input_center, input_rect = find_input_box_center(image_path)
# 1. 预处理
# 转为灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 使用自适应阈值二值化
thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 11, 2)
# 形态学操作:闭运算,填充内部空洞
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# 2. 轮廓查找
contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
logger.info(f"检测到轮廓数量: {len(contours)}")
# 收集所有符合条件的头像
avatars = []
# 3. 筛选轮廓
for contour in contours:
# 获取外接矩形
x, y, w, h = cv2.boundingRect(contour)
# 筛选条件优化:
# 1. 形状接近正方形 (放宽宽高比限制: 0.8 ~ 1.2)
aspect_ratio = float(w) / h
# 2. 尺寸适中
# 假设头像宽度在屏幕宽度的 6% 到 15% 之间
min_w = width * 0.06
max_w = width * 0.15
# 3. 位置筛选
# 排除底部输入框区域 (假设底部 10% 为输入区域)
if y > height * 0.9:
continue
# 左侧头像:靠左边 (x < width * 0.18)
# 右侧头像:靠右边 (x > width * 0.82)
is_left = x < width * 0.18
is_right = x > width * 0.82
if 0.8 <= aspect_ratio <= 1.2 and min_w < w < max_w:
if is_left or is_right:
side = "Left" if is_left else "Right"
avatars.append({
'x': x, 'y': y, 'w': w, 'h': h,
'side': side
})
# 按 y 坐标排序
avatars.sort(key=lambda a: a['y'])
logger.info(f"找到有效头像数量: {len(avatars)}")
# 初始化 OCR
try:
reader = get_easyocr_reader(gpu=True)
logger.info("OCR 初始化成功")
except Exception as e:
logger.error(f"OCR 初始化失败: {e}")
reader = None
dialogue_log = []
# 4. 绘制对话内容框 (Green/Red Boxes)
if avatars:
i = 0
while i < len(avatars):
current_group_start = i
current_side = avatars[i]['side']
# 找到当前组的结束位置 (即下一个不同侧头像的索引)
j = i + 1
while j < len(avatars) and avatars[j]['side'] == current_side:
j += 1
# Start Y: 当前组第一个头像的上方 (例如 -10px)
start_y = max(0, avatars[i]['y'] - 10)
# End Y
if j < len(avatars):
end_y = max(start_y + 10, avatars[j]['y'] - 30)
else:
# 最后一个框的底边,使用输入框的上沿
if input_rect:
_, input_y, _, _ = input_rect
end_y = max(start_y + 10, input_y - 10)
else:
end_y = int(height * 0.9) # 默认
# 绘制大框
# 左侧 (Left) -> 对方 -> 绿色 (0, 255, 0)
# 右侧 (Right) -> 我 -> 红色 (0, 0, 255)
box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255)
cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5)
logger.info(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}")
# OCR 识别区域文本
if reader:
try:
safe_start_y = max(0, start_y)
safe_end_y = min(height, end_y)
if safe_end_y > safe_start_y:
roi_img = img_clean[safe_start_y:safe_end_y, 0:width]
# 识别
results = reader.read_text(roi_img)
# 过滤关键词
ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"]
texts = []
for res in results:
text = res[1]
prob = res[2]
if prob > 0.3:
# 1. 检查是否包含屏蔽词
if any(kw in text for kw in ignore_keywords):
continue
# 2. 检查是否为单行时间
if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text):
continue
texts.append(text)
combined_text = " ".join(texts)
if combined_text.strip():
role = target_name if current_side == "Left" else ""
dialogue_log.append(f"{role}: {combined_text}")
logger.info(f" -> OCR结果: {combined_text}")
except Exception as e:
logger.error(f" -> OCR出错: {e}")
# 移动到下一组
i = j
# 5. 绘制头像框 (Blue/Yellow Boxes) - 画在内容框之上
for av in avatars:
x, y, w, h = av['x'], av['y'], av['w'], av['h']
color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255)
cv2.rectangle(img, (x, y), (x + w, y + h), color, 10)
logger.info(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}")
# 6. 保存结果
try:
# 如果有识别到输入框,画出来 (紫色)
if input_rect:
rx, ry, rw, rh = input_rect
cv2.rectangle(img, (rx, ry), (rx + rw, ry + rh), (255, 0, 255), 5)
# 画中心点
if input_center:
cx, cy = input_center
cv2.circle(img, (cx, cy), 10, (255, 0, 255), -1)
logger.info(f"已绘制输入框标记: {input_rect}")
# cv2.imwrite 不支持中文路径,使用 imencode + tofile
ext = os.path.splitext(output_path)[1]
cv2.imencode(ext, img)[1].tofile(output_path)
logger.info(f"✅ 分析结果已保存至: {output_path}")
except Exception as e:
logger.error(f"保存分析图片失败: {e}")
logger.info("\n" + "="*30)
logger.info("对话内容汇总:")
for line in dialogue_log:
logger.info(line)
logger.info("="*30 + "\n")
def main():
logger.info("开始执行微信搜索测试...")
# 连接设备
try:
d = u2.connect()
logger.info(f"设备连接成功: {d.info.get('serial')}")
except Exception as e:
logger.error(f"设备连接失败: {e}")
return
# 1. 启动微信
logger.info("步骤 1: 启动微信...")
d.app_start("com.tencent.mm", stop=True)
# 等待微信启动完成
time.sleep(5)
# 获取屏幕尺寸
w, h = d.window_size()
logger.info(f"屏幕尺寸: {w}x{h}")
# 2. 点击搜索按钮 (参考 Opener.py 的坐标比例: w * 0.84, h * 0.08)
search_x = int(w * 0.84)
search_y = int(h * 0.08)
logger.info(f"步骤 2: 点击搜索按钮 (坐标: {search_x}, {search_y})...")
d.click(search_x, search_y)
time.sleep(2)
# 3. 输入 "糖豆爸爸"
target_name = "糖豆爸爸"
logger.info(f"步骤 3: 输入搜索内容 '{target_name}'...")
try:
# 启用 FastInputIME 以支持中文输入
d.set_input_ime(True)
# 点击搜索框获取焦点 (参考 Opener.py: w * 0.4, h * 0.08)
d.click(int(w * 0.4), int(h * 0.08))
time.sleep(1)
# 输入文字
d.send_keys(target_name)
time.sleep(2)
# 恢复输入法
d.set_input_ime(False)
except Exception as e:
logger.error(f"输入文字失败: {e}")
try:
d(focused=True).set_text(target_name)
except:
pass
# 4. 点击搜索结果
logger.info("步骤 4: 查找并点击搜索结果...")
time.sleep(2)
found = False
# 策略 A: 精确匹配文本
if d(text=target_name).exists:
logger.info(f"找到文本为 '{target_name}' 的元素,点击...")
d(text=target_name).click()
found = True
else:
logger.warning(f"未找到文本为 '{target_name}' 的元素,尝试模糊匹配或坐标点击...")
# 策略 B: 坐标点击 (参考 Opener.py 点击第一个结果: w * 0.5, h * 0.18)
result_x = int(w * 0.5)
result_y = int(h * 0.18)
logger.info(f"尝试点击第一个搜索结果位置 ({result_x}, {result_y})...")
d.click(result_x, result_y)
found = True
if found:
logger.info("✅ 已执行点击操作,应该已进入对话框。")
# 5. 截图保存结果
logger.info("步骤 5: 截图并分析...")
# 等待界面加载稳定
time.sleep(3)
# 创建截图目录
screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
if not os.path.exists(screenshot_dir):
os.makedirs(screenshot_dir)
# 生成文件名
timestamp = time.strftime("%Y%m%d_%H%M%S")
filename = f"chat_result_{timestamp}.jpg"
save_path = os.path.join(screenshot_dir, filename)
try:
d.screenshot(save_path)
logger.info(f"✅ 原始截图已保存: {save_path}")
# 6. 分析截图并标记头像
logger.info("步骤 6: 自动标记头像和识别内容...")
analyzed_filename = f"chat_result_{timestamp}_analyzed.jpg"
analyzed_path = os.path.join(screenshot_dir, analyzed_filename)
analyze_chat_image(save_path, analyzed_path, target_name=target_name)
# 7. 查找输入框并执行自动化操作
logger.info("步骤 7: 自动回复...")
# 注意analyze_chat_image 内部已经调用了 find_input_box_center 并且画在图上了
# 但我们需要返回值来执行操作。analyze_chat_image 并没有返回坐标。
# 为了简单,再次调用一次 (或者修改 analyze_chat_image 返回坐标,但改动较大)
# 这里直接再次调用 find_input_box_center 获取坐标
coords, _ = find_input_box_center(save_path)
if coords:
logger.info(f"输入框坐标: {coords}")
perform_input_action(coords, "AI助手我现在可以开始和你聊天了")
else:
logger.warning("未找到输入框坐标,跳过回复")
except Exception as e:
logger.error(f"❌ 截图或分析失败: {e}")
else:
logger.error("❌ 未能定位到搜索结果,跳过截图。")
logger.info("测试结束。")
if __name__ == "__main__":
main()