diff --git a/Test/Screenshots/chat_result_20260121_113553.jpg b/Test/Screenshots/chat_result_20260121_113553.jpg deleted file mode 100644 index 74a6535..0000000 Binary files a/Test/Screenshots/chat_result_20260121_113553.jpg and /dev/null differ diff --git a/Test/Screenshots/chat_result_analyzed.jpg b/Test/Screenshots/chat_result_analyzed.jpg deleted file mode 100644 index 6b6d8d1..0000000 Binary files a/Test/Screenshots/chat_result_analyzed.jpg and /dev/null differ diff --git a/Test/T1_StartWeiXin.py b/Test/T1_StartWeiXin.py index 11c1308..9a4449f 100644 --- a/Test/T1_StartWeiXin.py +++ b/Test/T1_StartWeiXin.py @@ -6,19 +6,400 @@ import sys import os import cv2 import numpy as np +import re + +# 添加项目根目录到 sys.path 以便导入 Util +project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if project_root not in sys.path: + sys.path.append(project_root) + +from Util.EasyOcrKit import get_easyocr_reader # 配置日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger("TestWeChat") -def analyze_chat_image(image_path, output_path): +def find_input_box_center(image_path): """ - 识别微信聊天截图中的头像并画框 + 识别底部输入框的中心坐标,返回 (center_x, center_y, rect_box) + rect_box 用于可视化: (x, y, w, h) (相对于原图) + """ + try: + img_data = np.fromfile(image_path, dtype=np.uint8) + img = cv2.imdecode(img_data, cv2.IMREAD_COLOR) + if img is None: + return None, None + + height, width = img.shape[:2] + + # 截取底部 12% 区域 (缩小范围以精准定位) + bottom_h = int(height * 0.12) + crop_y_start = height - bottom_h + crop = img[crop_y_start:height, 0:width] + + # 预处理 + gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) + + # 策略1: Canny 边缘检测 (对低对比度 UI 更有效) + edges = cv2.Canny(gray, 50, 150) + # 膨胀连接断裂的边缘 + kernel = np.ones((3,3), np.uint8) + dilated = cv2.dilate(edges, kernel, iterations=1) + + contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + best_cnt = None + max_area = 0 + + for cnt in contours: + x, y, w, h = cv2.boundingRect(cnt) + # 筛选条件优化: + # 1. 宽度: 屏幕宽度的 50% - 98% (通常输入框很长) + # 2. 高度: 30px - 底部区域的 90% + if width * 0.5 < w < width * 0.98 and 30 < h < bottom_h * 0.9: + # 计算中心点 Y 坐标相对于全图 + global_y = crop_y_start + y + h // 2 + + # 排除过于靠下的区域 (导航栏/手势条),通常在最后 2% + if global_y > height * 0.98: + continue + + if w * h > max_area: + max_area = w * h + best_cnt = (x, y, w, h) + + if best_cnt: + x, y, w, h = best_cnt + center_x = x + w // 2 + center_y = crop_y_start + y + h // 2 + logger.info(f"找到输入框(CV-Canny): ({center_x}, {center_y}), 尺寸: {w}x{h}") + return (center_x, center_y), (x, crop_y_start + y, w, h) + + # 策略2: 自适应阈值 (原有逻辑作为备份) + thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + cv2.THRESH_BINARY_INV, 11, 2) + contours_thresh, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + for cnt in contours_thresh: + x, y, w, h = cv2.boundingRect(cnt) + if width * 0.4 < w < width * 0.95 and 20 < h < bottom_h * 0.8: + global_y = crop_y_start + y + h // 2 + if global_y > height * 0.98: + continue + if w * h > max_area: + max_area = w * h + best_cnt = (x, y, w, h) + + if best_cnt: + x, y, w, h = best_cnt + center_x = x + w // 2 + center_y = crop_y_start + y + h // 2 + logger.info(f"找到输入框(CV-Adaptive): ({center_x}, {center_y}), 尺寸: {w}x{h}") + return (center_x, center_y), (x, crop_y_start + y, w, h) + + # 兜底策略:使用更靠下的默认坐标 (94%) + # 之前的 90.5% 用户反馈偏上 + logger.warning("未找到明显输入框轮廓,使用更靠下的默认坐标 (94%)") + default_y = int(height * 0.94) + center_x = width // 2 + + # 构造假想框 + fake_w = int(width * 0.7) + fake_h = int(height * 0.08) # 稍微加高一点,视觉上更像 + fake_x = (width - fake_w) // 2 + fake_y = default_y - fake_h // 2 + + return (center_x, default_y), (fake_x, fake_y, fake_w, fake_h) + + + except Exception as e: + logger.error(f"查找输入框失败: {e}") + return None, None + +def find_send_button(d): + """ + 截图并寻找发送按钮 (绿色按钮) + 扩大搜索范围以适应键盘弹出的情况 + """ + try: + # 截图到 Screenshots 目录方便调试 + screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots") + if not os.path.exists(screenshot_dir): + os.makedirs(screenshot_dir) + timestamp = time.strftime("%Y%m%d_%H%M%S") + debug_shot_path = os.path.join(screenshot_dir, f"debug_send_check_{timestamp}.jpg") + + d.screenshot(debug_shot_path) + logger.info(f"发送按钮查找调试截图已保存: {debug_shot_path}") + + img = cv2.imread(debug_shot_path) + if img is None: + return None + + h, w = img.shape[:2] + + # ROI: 底部 60% (考虑到键盘弹出,按钮可能被顶上去) + # 且只关注右侧 30% + roi_h = int(h * 0.6) + roi_w = int(w * 0.3) + y_start = h - roi_h + x_start = w - roi_w + + roi = img[y_start:h, x_start:w] + + # 转换 HSV + hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) + + # 绿色范围 (WeChat Green) + lower_green = np.array([35, 80, 80]) + upper_green = np.array([90, 255, 255]) + + mask = cv2.inRange(hsv, lower_green, upper_green) + + # 查找轮廓 + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + if contours: + # 找符合条件的轮廓 + valid_candidates = [] + for cnt in contours: + area = cv2.contourArea(cnt) + x, y, cw, ch = cv2.boundingRect(cnt) + + # 过滤太小的噪点和太大的区域(例如全屏背景) + # 发送按钮通常面积在 2000-15000 之间 (视分辨率而定) + if 500 < area < 30000: + # 宽高比检查:发送按钮通常接近正方形或微扁 (ratio < 2.5) + ratio = float(cw) / ch + if 0.5 < ratio < 3.0: + # 坐标还原到原图 + global_y = y_start + y + valid_candidates.append({ + 'cnt': cnt, + 'area': area, + 'y': global_y, + 'rect': (x, y, cw, ch) + }) + + if valid_candidates: + # 核心逻辑:发送按钮一定是所有绿色元素中最靠下的 (Y坐标最大) + # 且在最右侧 + # 先按 Y 坐标降序排序 + valid_candidates.sort(key=lambda c: c['y'], reverse=True) + + # 取最靠下的一个 (可能是发送按钮) + best = valid_candidates[0] + + # 获取中心点 + bx, by, bw, bh = best['rect'] + cx = x_start + bx + bw // 2 + cy = y_start + by + bh // 2 + + logger.info(f"通过图像识别找到发送按钮 (Bottom-Most): ({cx}, {cy}), 面积: {best['area']}") + return cx, cy + + logger.warning("未通过图像识别找到绿色发送按钮") + return None + + except Exception as e: + logger.error(f"查找发送按钮出错: {e}") + return None + +def take_debug_screenshot(d, step_name): + """ + 调试专用截图函数 + """ + try: + screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots") + if not os.path.exists(screenshot_dir): + os.makedirs(screenshot_dir) + timestamp = time.strftime("%Y%m%d_%H%M%S") + filename = f"debug_{timestamp}_{step_name}.jpg" + save_path = os.path.join(screenshot_dir, filename) + d.screenshot(save_path) + logger.info(f"📸 [调试截图] {step_name} 已保存: {filename}") + return save_path + except Exception as e: + logger.error(f"截图失败 ({step_name}): {e}") + return None + +def perform_input_action(coords, text): + """ + 点击坐标并输入文本 + """ + # 优先尝试使用 uiautomator2 的原生控件查找 (更稳健) + native_success = False + try: + d = u2.connect() + # 查找 EditText 控件 + input_elem = d(className="android.widget.EditText") + + if input_elem.exists: + logger.info("发现原生输入框控件,尝试点击...") + + # 1. 截图:点击前 + take_debug_screenshot(d, "native_01_before_click") + + # 双击策略 + input_elem.click() + time.sleep(0.5) + input_elem.click() + time.sleep(1) + + # 2. 截图:点击后 (预期键盘弹出) + take_debug_screenshot(d, "native_02_after_click_keyboard") + + logger.info(f"输入文本: {text}") + + # 尝试 set_text + send_keys 组合 + try: + input_elem.set_text(text) + except: + pass + + time.sleep(0.5) + + # 检查文本是否输入成功,如果没有,尝试 send_keys + try: + current_text = input_elem.get_text() + if not current_text or current_text != text: + logger.warning(f"set_text 似乎未生效 (当前: {current_text}),尝试 send_keys...") + d.send_keys(text) + except: + d.send_keys(text) + + # 3. 截图:输入文本后 + take_debug_screenshot(d, "native_03_after_text_input") + + # 尝试发送回车键 + time.sleep(0.5) + d.press("enter") + + # 尝试点击发送按钮 + try: + if d(text="发送").exists: + d(text="发送").click() + logger.info("已点击 '发送' 按钮 (Native Text)") + take_debug_screenshot(d, "native_04_after_send_click_text") + else: + logger.info("未找到 '发送' 文本控件,尝试图像识别...") + send_btn_coords = find_send_button(d) + if send_btn_coords: + sx, sy = send_btn_coords + d.click(sx, sy) + logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}") + take_debug_screenshot(d, "native_04_after_send_click_image") + else: + width, height = d.window_size() + fallback_x = int(width * 0.9) + fallback_y = int(height * 0.965) + logger.info(f"未识别到发送按钮,尝试盲点右下角: {fallback_x}, {fallback_y}") + d.click(fallback_x, fallback_y) + take_debug_screenshot(d, "native_04_after_send_click_fallback") + except Exception as e: + logger.error(f"点击发送按钮失败: {e}") + + logger.info("输入完成 (Native)") + native_success = True + return + + else: + logger.warning("未找到输入框元素 (Native),转入坐标点击模式...") + + except Exception as e: + logger.warning(f"原生控件操作失败,降级为坐标点击: {e}") + + if native_success: + return + + # 降级方案:使用坐标点击 + if not coords: + logger.error("坐标无效,无法执行点击输入") + return + + x, y = coords + + # 坐标安全检查 + if 'd' not in locals(): + d = u2.connect() + + try: + width, height = d.window_size() + # 移除过于激进的坐标修正 (95% -> 93% 会导致点到输入框上方) + # 输入框中心通常在 96%-97% 左右 + if y > height * 0.99: + logger.warning(f"检测到的输入框坐标 y={y} 过于靠底,修正为 {height * 0.97}") + y = int(height * 0.97) + except: + pass + + try: + logger.info(f"设备连接成功: {d.info.get('serial')}") + + # 1. 截图:点击输入框前 + take_debug_screenshot(d, "coord_01_before_click_input") + + # 点击输入框 + logger.info(f"点击坐标: ({x}, {y})") + d.click(x, y) + time.sleep(0.5) + d.click(x, y) # Double click + + # 等待键盘弹出 + time.sleep(1.5) + + # 2. 截图:点击输入框后 + take_debug_screenshot(d, "coord_02_after_click_input_keyboard") + + # 输入文本 + logger.info(f"输入文本 (SendKeys): {text}") + try: + d.send_keys(text) + except Exception as sk_e: + logger.error(f"send_keys 失败: {sk_e}") + + # 3. 截图:输入文本后 + take_debug_screenshot(d, "coord_03_after_input_text") + + time.sleep(0.5) + d.press("enter") + + # 尝试查找发送按钮并点击 + try: + if d(text="发送").exists: + d(text="发送").click() + logger.info("已点击 '发送' 按钮 (Native Text)") + take_debug_screenshot(d, "coord_04_after_click_send_native") + else: + logger.info("未找到 '发送' 文本控件,尝试图像识别...") + send_btn_coords = find_send_button(d) + if send_btn_coords: + sx, sy = send_btn_coords + d.click(sx, sy) + logger.info(f"已点击 '发送' 按钮 (Image Rec): {sx}, {sy}") + take_debug_screenshot(d, "coord_04_after_click_send_image") + else: + width, height = d.window_size() + fallback_x = int(width * 0.9) + fallback_y = int(height * 0.965) + logger.info(f"未识别到发送按钮,尝试盲点右下角: {fallback_x}, {fallback_y}") + d.click(fallback_x, fallback_y) + take_debug_screenshot(d, "coord_04_after_click_send_fallback") + except Exception as e: + logger.error(f"点击发送按钮失败: {e}") + + logger.info("输入完成 (Coordinate)") + + except Exception as e: + logger.error(f"自动化操作失败: {e}") + +def analyze_chat_image(image_path, output_path, target_name="对方"): + """ + 识别微信聊天截图中的头像并画框,识别对话内容 """ logger.info(f"正在分析图片: {image_path}") - # 读取图片 - # 注意:cv2.imread 不支持中文路径,需要用 np.fromfile 读取 + # 读取图片(支持中文路径) try: img_data = np.fromfile(image_path, dtype=np.uint8) img = cv2.imdecode(img_data, cv2.IMREAD_COLOR) @@ -29,10 +410,16 @@ def analyze_chat_image(image_path, output_path): if img is None: logger.error("图片读取为空") return - + + # 备份一份干净的图片用于 OCR (避免识别到画上去的框) + img_clean = img.copy() + height, width = img.shape[:2] logger.info(f"图片尺寸: {width}x{height}") + # 调用输入框识别,获取坐标和可视化框 + input_center, input_rect = find_input_box_center(image_path) + # 1. 预处理 # 转为灰度图 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) @@ -49,7 +436,8 @@ def analyze_chat_image(image_path, output_path): contours, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) logger.info(f"检测到轮廓数量: {len(contours)}") - avatar_count = 0 + # 收集所有符合条件的头像 + avatars = [] # 3. 筛选轮廓 for contour in contours: @@ -69,7 +457,7 @@ def analyze_chat_image(image_path, output_path): # 排除底部输入框区域 (假设底部 10% 为输入区域) if y > height * 0.9: continue - + # 左侧头像:靠左边 (x < width * 0.18) # 右侧头像:靠右边 (x > width * 0.82) is_left = x < width * 0.18 @@ -77,22 +465,124 @@ def analyze_chat_image(image_path, output_path): if 0.8 <= aspect_ratio <= 1.2 and min_w < w < max_w: if is_left or is_right: - # 确定颜色 - # 左侧:蓝色 (BGR: 255, 0, 0) - # 右侧:黄色 (BGR: 0, 255, 255) - color = (255, 0, 0) if is_left else (0, 255, 255) - - # 绘制矩形框,线宽为 3 - cv2.rectangle(img, (x, y), (x + w, y + h), color, 3) - - label = "Left" if is_left else "Right" - avatar_count += 1 - logger.info(f"找到头像: 位置=({x},{y}), 尺寸={w}x{h}, 侧别={label}") + side = "Left" if is_left else "Right" + avatars.append({ + 'x': x, 'y': y, 'w': w, 'h': h, + 'side': side + }) - logger.info(f"共标记了 {avatar_count} 个头像") + # 按 y 坐标排序 + avatars.sort(key=lambda a: a['y']) + + logger.info(f"找到有效头像数量: {len(avatars)}") - # 4. 保存结果 + # 初始化 OCR try: + reader = get_easyocr_reader(gpu=True) + logger.info("OCR 初始化成功") + except Exception as e: + logger.error(f"OCR 初始化失败: {e}") + reader = None + + dialogue_log = [] + + # 4. 绘制对话内容框 (Green/Red Boxes) + if avatars: + i = 0 + while i < len(avatars): + current_group_start = i + current_side = avatars[i]['side'] + + # 找到当前组的结束位置 (即下一个不同侧头像的索引) + j = i + 1 + while j < len(avatars) and avatars[j]['side'] == current_side: + j += 1 + + # Start Y: 当前组第一个头像的上方 (例如 -10px) + start_y = max(0, avatars[i]['y'] - 10) + + # End Y + if j < len(avatars): + end_y = max(start_y + 10, avatars[j]['y'] - 30) + else: + # 最后一个框的底边,使用输入框的上沿 + if input_rect: + _, input_y, _, _ = input_rect + end_y = max(start_y + 10, input_y - 10) + else: + end_y = int(height * 0.9) # 默认 + + # 绘制大框 + # 左侧 (Left) -> 对方 -> 绿色 (0, 255, 0) + # 右侧 (Right) -> 我 -> 红色 (0, 0, 255) + box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255) + + cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5) + + logger.info(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}") + + # OCR 识别区域文本 + if reader: + try: + safe_start_y = max(0, start_y) + safe_end_y = min(height, end_y) + + if safe_end_y > safe_start_y: + roi_img = img_clean[safe_start_y:safe_end_y, 0:width] + + # 识别 + results = reader.read_text(roi_img) + + # 过滤关键词 + ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"] + + texts = [] + for res in results: + text = res[1] + prob = res[2] + if prob > 0.3: + # 1. 检查是否包含屏蔽词 + if any(kw in text for kw in ignore_keywords): + continue + + # 2. 检查是否为单行时间 + if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text): + continue + + texts.append(text) + + combined_text = " ".join(texts) + + if combined_text.strip(): + role = target_name if current_side == "Left" else "我" + dialogue_log.append(f"{role}: {combined_text}") + logger.info(f" -> OCR结果: {combined_text}") + except Exception as e: + logger.error(f" -> OCR出错: {e}") + + # 移动到下一组 + i = j + + # 5. 绘制头像框 (Blue/Yellow Boxes) - 画在内容框之上 + for av in avatars: + x, y, w, h = av['x'], av['y'], av['w'], av['h'] + color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255) + + cv2.rectangle(img, (x, y), (x + w, y + h), color, 10) + logger.info(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}") + + # 6. 保存结果 + try: + # 如果有识别到输入框,画出来 (紫色) + if input_rect: + rx, ry, rw, rh = input_rect + cv2.rectangle(img, (rx, ry), (rx + rw, ry + rh), (255, 0, 255), 5) + # 画中心点 + if input_center: + cx, cy = input_center + cv2.circle(img, (cx, cy), 10, (255, 0, 255), -1) + logger.info(f"已绘制输入框标记: {input_rect}") + # cv2.imwrite 不支持中文路径,使用 imencode + tofile ext = os.path.splitext(output_path)[1] cv2.imencode(ext, img)[1].tofile(output_path) @@ -100,6 +590,12 @@ def analyze_chat_image(image_path, output_path): except Exception as e: logger.error(f"保存分析图片失败: {e}") + logger.info("\n" + "="*30) + logger.info("对话内容汇总:") + for line in dialogue_log: + logger.info(line) + logger.info("="*30 + "\n") + def main(): logger.info("开始执行微信搜索测试...") @@ -197,11 +693,25 @@ def main(): logger.info(f"✅ 原始截图已保存: {save_path}") # 6. 分析截图并标记头像 - logger.info("步骤 6: 自动标记头像...") + logger.info("步骤 6: 自动标记头像和识别内容...") analyzed_filename = f"chat_result_{timestamp}_analyzed.jpg" analyzed_path = os.path.join(screenshot_dir, analyzed_filename) - analyze_chat_image(save_path, analyzed_path) + analyze_chat_image(save_path, analyzed_path, target_name=target_name) + + # 7. 查找输入框并执行自动化操作 + logger.info("步骤 7: 自动回复...") + # 注意:analyze_chat_image 内部已经调用了 find_input_box_center 并且画在图上了 + # 但我们需要返回值来执行操作。analyze_chat_image 并没有返回坐标。 + # 为了简单,再次调用一次 (或者修改 analyze_chat_image 返回坐标,但改动较大) + # 这里直接再次调用 find_input_box_center 获取坐标 + coords, _ = find_input_box_center(save_path) + + if coords: + logger.info(f"输入框坐标: {coords}") + perform_input_action(coords, "AI助手我现在可以开始和你聊天了!") + else: + logger.warning("未找到输入框坐标,跳过回复") except Exception as e: logger.error(f"❌ 截图或分析失败: {e}") diff --git a/Test/testWeXinChat.py b/Test/T2_testWeXinChat.py similarity index 95% rename from Test/testWeXinChat.py rename to Test/T2_testWeXinChat.py index 046606a..58f6f42 100644 --- a/Test/testWeXinChat.py +++ b/Test/T2_testWeXinChat.py @@ -126,9 +126,9 @@ def analyze_chat_image(image_path, output_path): # 注意 OpenCV 颜色是 BGR box_color = (0, 255, 0) if current_side == "Left" else (0, 0, 255) - # 绘制矩形 (空心,线宽 2) + # 绘制矩形 (空心,线宽 5) # X 轴范围:0 到 width - cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 2) + cv2.rectangle(img, (0, start_y), (width, end_y), box_color, 5) logger.info(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}") @@ -142,7 +142,7 @@ def analyze_chat_image(image_path, output_path): # 右侧:黄色 (BGR: 0, 255, 255) color = (255, 0, 0) if av['side'] == "Left" else (0, 255, 255) - cv2.rectangle(img, (x, y), (x + w, y + h), color, 3) + cv2.rectangle(img, (x, y), (x + w, y + h), color, 10) logger.info(f"绘制头像: 位置=({x},{y}), 侧别={av['side']}") logger.info(f"共标记了 {len(avatars)} 个头像") diff --git a/Test/__pycache__/T1_StartWeiXin.cpython-310.pyc b/Test/__pycache__/T1_StartWeiXin.cpython-310.pyc new file mode 100644 index 0000000..4454928 Binary files /dev/null and b/Test/__pycache__/T1_StartWeiXin.cpython-310.pyc differ diff --git a/Test/analyze_chat_avatars.py b/Test/analyze_chat_avatars.py index 520234d..e530189 100644 --- a/Test/analyze_chat_avatars.py +++ b/Test/analyze_chat_avatars.py @@ -1,24 +1,126 @@ # coding=utf-8 import cv2 import numpy as np +import sys import os +import logging +import re +import time +try: + import uiautomator2 as u2 +except ImportError: + u2 = None -def analyze_chat_image(image_path, output_path): - print(f"正在读取图片: {image_path}") - - # 读取图片 - # 注意:cv2.imread 不支持中文路径,需要用 np.fromfile 读取 +# 添加项目根目录到 sys.path 以便导入 Util +project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +if project_root not in sys.path: + sys.path.append(project_root) + +from Util.EasyOcrKit import get_easyocr_reader + +# 配置日志 +logging.basicConfig(level=logging.INFO, format='%(message)s') +logger = logging.getLogger(__name__) + +def find_input_box_center(image_path): + """ + 识别底部输入框的中心坐标 + """ try: img_data = np.fromfile(image_path, dtype=np.uint8) img = cv2.imdecode(img_data, cv2.IMREAD_COLOR) + if img is None: + return None + + height, width = img.shape[:2] + + # 截取底部 15% 区域 + bottom_h = int(height * 0.15) + crop_y_start = height - bottom_h + crop = img[crop_y_start:height, 0:width] + + # 预处理 + gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY) + # 简单二值化,输入框通常是浅色或深色背景上的反色 + # 这里假设深色模式下,输入框可能较亮,或者有边框 + # 尝试自适应阈值 + thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, + cv2.THRESH_BINARY_INV, 11, 2) + + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + best_cnt = None + max_area = 0 + + for cnt in contours: + x, y, w, h = cv2.boundingRect(cnt) + # 筛选条件:宽度较大 (比如 > 50% 屏幕宽),高度适中 + if w > width * 0.5 and h > 20: + if w * h > max_area: + max_area = w * h + best_cnt = (x, y, w, h) + + if best_cnt: + x, y, w, h = best_cnt + center_x = x + w // 2 + center_y = crop_y_start + y + h // 2 + logger.info(f"找到输入框: ({center_x}, {center_y}), 尺寸: {w}x{h}") + return center_x, center_y + else: + # 兜底:返回底部区域中心 + logger.warning("未找到明显输入框轮廓,使用默认坐标") + return width // 2, int(height * 0.95) + except Exception as e: - print(f"读取图片失败: {e}") + logger.error(f"查找输入框失败: {e}") + return None + +def perform_input_action(coords, text): + """ + 点击坐标并输入文本 + """ + if u2 is None: + logger.error("未安装 uiautomator2 库,无法执行自动化操作") return - if img is None: - print("图片读取为空") + if not coords: + logger.error("坐标无效,无法执行点击输入") return + x, y = coords + try: + # 连接设备 (默认连接第一个 USB 设备) + d = u2.connect() + logger.info(f"设备连接成功: {d.info.get('serial')}") + + # 点击输入框 + logger.info(f"点击坐标: ({x}, {y})") + d.click(x, y) + + # 等待键盘弹出或输入框激活 + time.sleep(1) + + # 输入文本 + logger.info(f"输入文本: {text}") + d.send_keys(text) + + # 尝试发送回车键以确认输入 (视具体情况而定) + # d.press("enter") + + logger.info("输入完成") + + except Exception as e: + logger.error(f"自动化操作失败: {e}") + print(f"自动化操作失败: {e}") + +def analyze_chat_image(image_path, output_path, target_name="对方"): + # 读取图片(支持中文路径) + img_data = np.fromfile(image_path, dtype=np.uint8) + img = cv2.imdecode(img_data, cv2.IMREAD_COLOR) + + # 备份一份干净的图片用于 OCR (避免识别到画上去的框) + img_clean = img.copy() + height, width = img.shape[:2] print(f"图片尺寸: {width}x{height}") @@ -79,6 +181,16 @@ def analyze_chat_image(image_path, output_path): print(f"找到有效头像数量: {len(avatars)}") + # 初始化 OCR + try: + reader = get_easyocr_reader(gpu=True) + print("OCR 初始化成功") + except Exception as e: + print(f"OCR 初始化失败: {e}") + reader = None + + dialogue_log = [] + # 4. 绘制对话内容框 (Green/Red Boxes) # 策略:按顺序遍历头像,如果发现同侧连续,则视为一组。 # 从当前组的第一个头像上方开始,直到下一个不同侧的头像上方(或底部)。 @@ -122,6 +234,49 @@ def analyze_chat_image(image_path, output_path): print(f"绘制内容框: 侧别={current_side}, 范围 Y={start_y} to {end_y}") + # OCR 识别区域文本 + if reader: + try: + # 从原图(img_clean)裁剪区域 + # 注意边界检查 + safe_start_y = max(0, start_y) + safe_end_y = min(height, end_y) + + if safe_end_y > safe_start_y: + roi_img = img_clean[safe_start_y:safe_end_y, 0:width] + + # 识别 + results = reader.read_text(roi_img) + + # 过滤关键词 + ignore_keywords = ["点击查看对话内容", "以上是打招呼的消息", "和 Kimi 的对话", "Kim智能助手"] + + # 过滤并拼接文本 (置信度 > 0.3) + texts = [] + for res in results: + text = res[1] + prob = res[2] + if prob > 0.3: + # 1. 检查是否包含屏蔽词 + if any(kw in text for kw in ignore_keywords): + continue + + # 2. 检查是否为单行时间 (如 11:35, 09:00) + # 正则匹配: 只有时间格式,没有其他文字 + if re.match(r'^\s*\d{1,2}:\d{2}\s*$', text): + continue + + texts.append(text) + + combined_text = " ".join(texts) + + if combined_text.strip(): + role = target_name if current_side == "Left" else "我" + dialogue_log.append(f"{role}: {combined_text}") + print(f" -> OCR结果: {combined_text}") + except Exception as e: + print(f" -> OCR出错: {e}") + # 移动到下一组 i = j @@ -144,10 +299,32 @@ def analyze_chat_image(image_path, output_path): except Exception as e: print(f"保存图片失败: {e}") + print("\n" + "="*30) + print("对话内容汇总:") + for line in dialogue_log: + print(line) + print("="*30 + "\n") + if __name__ == "__main__": # 输入文件路径 input_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_20260121_113553.jpg" # 输出文件路径 output_file = r"d:\dsWork\aiData\Test\Screenshots\chat_result_analyzed.jpg" - analyze_chat_image(input_file, output_file) + # 目标联系人名称 (对应搜索关键字) + target_name = "糖豆爸爸" + + analyze_chat_image(input_file, output_file, target_name=target_name) + + # 2. 查找输入框并执行自动化操作 + print("\n" + "="*30) + print("开始执行自动化输入...") + + # 注意: 这里使用 input_file (截图) 来定位坐标 + # 前提是截图时的界面布局与当前设备界面一致 + coords = find_input_box_center(input_file) + if coords: + print(f"输入框坐标: {coords}") + perform_input_action(coords, "AI助手我现在可以开始和你聊天了!") + else: + print("未找到输入框坐标") diff --git a/Test/temp_send_check.jpg b/Test/temp_send_check.jpg new file mode 100644 index 0000000..3cbad6e Binary files /dev/null and b/Test/temp_send_check.jpg differ