import logging import os import cv2 import numpy as np import time from Config.Config import BOTTOM_SAFE_EXCLUDE_RATIO, TEMP_IMAGE_DIR logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) def read_image(path): """读取图片,支持中文路径""" try: return cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1) except Exception as e: logger.info(f"Error reading image {path}: {e}") return None def save_image(path, img): """保存图片,支持中文路径""" try: ext = os.path.splitext(path)[1] if not ext: ext = ".jpg" cv2.imencode(ext, img)[1].tofile(path) return True except Exception as e: logger.error(f"Error saving image {path}: {e}") return False # 截图 def take_screenshot(d, image_uuid, save_dir=TEMP_IMAGE_DIR): path = f"{save_dir}/{image_uuid}.jpg" os.makedirs(save_dir, exist_ok=True) d.screenshot(path) return path def click_image_template(d, template_path, timeout=5.0, threshold=0.8): """ 使用 OpenCV 模板匹配查找并点击图片 :param d: uiautomator2 设备对象 :param template_path: 模板图片路径 :param timeout: 超时时间(秒) :param threshold: 匹配阈值 (0.0 - 1.0) :return: 是否点击成功 """ if not os.path.exists(template_path): logger.info(f"Template file not found: {template_path}") return False template = read_image(template_path) if template is None: logger.info(f"Failed to load template: {template_path}") return False t_h, t_w = template.shape[:2] start_time = time.time() best_val_overall = 0.0 while time.time() - start_time < timeout: # 临时截图 temp_uuid = "temp_click_check" screenshot_path = take_screenshot(d, temp_uuid, save_dir=TEMP_IMAGE_DIR) target = read_image(screenshot_path) if target is None: time.sleep(0.5) continue # 多尺度匹配 found = None # 缩放比例从 0.5 到 1.5,步长 0.1 for scale in np.linspace(0.5, 1.5, 11): # 调整模板大小 resized_template = cv2.resize(template, (int(t_w * scale), int(t_h * scale))) r_h, r_w = resized_template.shape[:2] # 如果模板比目标还大,跳过 if r_h > target.shape[0] or r_w > target.shape[1]: continue result = cv2.matchTemplate(target, resized_template, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) if max_val > best_val_overall: best_val_overall = max_val if max_val >= threshold: found = (max_val, max_loc, r_w, r_h) break # 找到满足阈值的即停止当前截图的搜索 # 清理临时文件 try: os.remove(screenshot_path) except: pass if found: max_val, max_loc, r_w, r_h = found # 计算中心点 top_left = max_loc center_x = top_left[0] + r_w // 2 center_y = top_left[1] + r_h // 2 logger.info(f"Found image at ({center_x}, {center_y}) with confidence {max_val:.2f}") d.click(center_x, center_y) return True time.sleep(1.0) logger.info(f"Image not found after {timeout}s (Best confidence: {best_val_overall:.2f})") return False def detect_black_agree_button(image_path, debug_dir=None): """ 通过计算机图形学检测黑色的"同意"按钮 (Image 1 场景) 特征:黑色圆角矩形,位于屏幕中下部,面积适中 :param image_path: 截图路径 :return: (x, y) 坐标中心点,如果未找到返回 None """ if not os.path.exists(image_path): return None img = read_image(image_path) if img is None: return None h, w = img.shape[:2] # 转换为HSV颜色空间,因为黑色更容易过滤 hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) # 定义黑色的范围 # 黑色:V (Brightness) 很低 lower_black = np.array([0, 0, 0]) upper_black = np.array([180, 255, 40]) # V < 40 认为是黑色 mask = cv2.inRange(hsv, lower_black, upper_black) # 限制搜索区域:通常在屏幕下半部分 roi_top = int(h * 0.4) roi_bottom = int(h * 0.8) roi_mask = np.zeros_like(mask) roi_mask[roi_top:roi_bottom, :] = mask[roi_top:roi_bottom, :] # 形态学操作:去除噪点,连接断开的区域 kernel = np.ones((5, 5), np.uint8) roi_mask = cv2.morphologyEx(roi_mask, cv2.MORPH_CLOSE, kernel) roi_mask = cv2.morphologyEx(roi_mask, cv2.MORPH_OPEN, kernel) # 查找轮廓 contours, _ = cv2.findContours(roi_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) best_cnt = None max_area = 0 for cnt in contours: area = cv2.contourArea(cnt) x, y, cw, ch = cv2.boundingRect(cnt) aspect_ratio = float(cw) / ch # 过滤条件 # 1. 面积要够大 (例如 > 屏幕面积的 1%) if area < (w * h * 0.01): continue # 2. 宽高比:通常按钮是扁长条,例如 > 2.0 if aspect_ratio < 2.0 or aspect_ratio > 10.0: continue # 3. 宽度:通常占据屏幕宽度的 50% 以上 if cw < (w * 0.5): continue if area > max_area: max_area = area best_cnt = cnt if best_cnt is not None: x, y, cw, ch = cv2.boundingRect(best_cnt) center_x = x + cw // 2 center_y = y + ch // 2 logger.info(f"Found Black Agree Button at ({center_x}, {center_y}), Size: {cw}x{ch}") if debug_dir: os.makedirs(debug_dir, exist_ok=True) debug_img = img.copy() cv2.rectangle(debug_img, (x, y), (x+cw, y+ch), (0, 0, 255), 2) cv2.circle(debug_img, (center_x, center_y), 5, (0, 255, 0), -1) save_image(os.path.join(debug_dir, "debug_agree_btn.jpg"), debug_img) return (center_x, center_y) return None def detect_ad_close_x(image_path, template_path, debug_dir=None, threshold=0.7): """ 通过模板匹配检测"关闭(X)"按钮 (Image 2 场景) 支持多尺度匹配 :param image_path: 截图路径 :param template_path: 模板图片路径 :param debug_dir: 调试目录 :param threshold: 匹配阈值 :return: (x, y) 坐标中心点,如果未找到返回 None """ if not os.path.exists(image_path): return None if not os.path.exists(template_path): logger.warning(f"Template not found: {template_path}") return None target = read_image(image_path) template = read_image(template_path) if target is None or template is None: return None # 转换为灰度图进行匹配,减少颜色干扰 target_gray = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY) template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) t_h, t_w = template_gray.shape[:2] best_match = None # 多尺度匹配: 缩放模板 # 假设模板可能比实际大,也可能小。范围 0.5 - 1.5 scales = np.linspace(0.5, 1.5, 20) for scale in scales: # 计算缩放后的模板尺寸 new_w = int(t_w * scale) new_h = int(t_h * scale) # 确保缩放后的模板不大于目标图像 if new_w > target_gray.shape[1] or new_h > target_gray.shape[0]: continue resized_template = cv2.resize(template_gray, (new_w, new_h)) # 匹配 result = cv2.matchTemplate(target_gray, resized_template, cv2.TM_CCOEFF_NORMED) min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result) if best_match is None or max_val > best_match[0]: best_match = (max_val, max_loc, scale, new_w, new_h) if best_match is None: return None max_val, max_loc, best_scale, best_w, best_h = best_match # 获取目标图像尺寸 target_h, target_w = target_gray.shape[:2] top_left = max_loc center_x = top_left[0] + best_w // 2 center_y = top_left[1] + best_h // 2 # --- 位置启发式过滤 --- # 计算相对位置 rel_x = center_x / target_w rel_y = center_y / target_h logger.info(f"Ad Close Button Match: Confidence={max_val:.4f}, Scale={best_scale:.2f}, Pos=({center_x}, {center_y}), Rel=({rel_x:.2f}, {rel_y:.2f})") is_valid_pos = True # 规则1: 过滤掉屏幕正中央偏上的区域 (通常是广告标题、图标或内容) # 范围: X在 [0.3, 0.7] 且 Y在 [0.15, 0.5] if 0.3 < rel_x < 0.7 and 0.15 < rel_y < 0.5: logger.warning(f"Ignored match at ({center_x}, {center_y}) - likely Ad Content/Title (Center-Top area).") is_valid_pos = False # 规则2: 如果置信度不是特别高 (>0.9), 强制要求在典型区域 (右上角 或 底部中间) # 右上角: X > 0.7, Y < 0.5 # 底部中间: Y > 0.6 if is_valid_pos and max_val < 0.98: # 如果置信度非常高(0.98+),可能是极其标准的X,暂时放过(除非在禁区) if not ((rel_x > 0.7 and rel_y < 0.5) or (rel_y > 0.6)): logger.warning(f"Ignored match at ({center_x}, {center_y}) - not in typical Close Button regions (Top-Right or Bottom).") is_valid_pos = False if max_val >= threshold: if is_valid_pos: # [Safety Check] 底部安全区排除 # 如果检测到的关闭按钮位于屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 区域内,认为是误判(如误触底部功能按钮) if center_y > (target_h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)): logger.warning(f"Ignored Ad Close Button at ({center_x}, {center_y}) - in Bottom Safety Zone ({int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}%).") # 也可以保存一下调试图 if debug_dir: os.makedirs(debug_dir, exist_ok=True) debug_img = target.copy() cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 0, 128), 2) # Dark Red for Safety Ignored cv2.putText(debug_img, f"SAFETY IGNORED", (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 128), 1) save_image(os.path.join(debug_dir, "debug_ad_close_safety_ignored.jpg"), debug_img) return None logger.info(f"Found Ad Close Button at ({center_x}, {center_y})") if debug_dir: os.makedirs(debug_dir, exist_ok=True) debug_img = target.copy() cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 0, 255), 2) cv2.circle(debug_img, (center_x, center_y), 5, (0, 255, 0), -1) cv2.putText(debug_img, f"Conf: {max_val:.2f}, Sc: {best_scale:.2f}", (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1) save_image(os.path.join(debug_dir, "debug_ad_close_x.jpg"), debug_img) return (center_x, center_y) else: # 虽然置信度高,但是位置不对,保存为 False Positive 供调试 if debug_dir: os.makedirs(debug_dir, exist_ok=True) debug_img = target.copy() cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 165, 255), 2) # Orange for ignored cv2.putText(debug_img, f"IGNORED Pos", (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 165, 255), 1) save_image(os.path.join(debug_dir, "debug_ad_close_ignored.jpg"), debug_img) # 如果没找到,但有一定置信度,也保存一下调试图以便分析 if max_val > 0.4 and debug_dir: os.makedirs(debug_dir, exist_ok=True) debug_img = target.copy() top_left = max_loc cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 255, 255), 2) cv2.putText(debug_img, f"Failed Conf: {max_val:.2f}", (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1) save_image(os.path.join(debug_dir, "debug_ad_close_fail.jpg"), debug_img) return None def detect_any_ad_close(image_path, template_dir, debug_dir=None): """ 遍历模板目录下的所有 ad_close*.jpg 进行匹配 """ if not os.path.exists(template_dir): return None for filename in os.listdir(template_dir): if filename.startswith("ad_close") and filename.endswith(".jpg"): template_path = os.path.join(template_dir, filename) logger.info(f"Trying template: {filename}") pos = detect_ad_close_x(image_path, template_path, debug_dir=debug_dir) if pos: return pos return None def detect_bottom_close_circle(image_path, debug_dir=None): """ 通过几何特征检测底部的圆形关闭按钮 (常见于插屏广告) 特征: 1. 位于屏幕底部区域 (Y > 60%) 2. 水平居中 (X 靠近 W/2) 3. 圆形或近似圆形 4. 内部有高对比度边缘 (X号) """ if not os.path.exists(image_path): return None img = read_image(image_path) if img is None: return None h, w = img.shape[:2] # 1. 提取感兴趣区域 (ROI): 屏幕底部 25% (缩小范围,避开列表内容区) roi_top = int(h * 0.75) roi_h = h - roi_top roi = img[roi_top:h, :] # 转灰度 gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) # 高斯模糊降噪 gray_blurred = cv2.GaussianBlur(gray, (9, 9), 2) # 2. 霍夫圆变换检测圆形 # dp=1.2 (累加器分辨率), minDist=w/5 (圆心最小距离), param1=100 (Canny高阈值), param2=30 (圆心累加阈值), minR=w*0.04, maxR=w*0.1 # 缩小最大半径范围,防止识别到过大的按钮(如扫码充电按钮) circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, dp=1.2, minDist=w/5, param1=100, param2=30, minRadius=int(w*0.04), maxRadius=int(w*0.09)) if circles is not None: circles = np.round(circles[0, :]).astype("int") best_circle = None min_dist_to_center = float('inf') for (cx, cy, r) in circles: # 还原到全图坐标 global_cy = roi_top + cy global_cx = cx # 过滤1: 必须在水平中心附近 (容差 15%,收紧限制) if abs(global_cx - w//2) > (w * 0.15): continue # 过滤2: 垂直方向限制 (必须在屏幕 80% - 95% 之间) # 这样可以避开位于 60%-80% 区域的列表项卡片 rel_y = global_cy / h if rel_y < 0.80 or rel_y > 0.95: continue dist = abs(global_cx - w//2) if dist < min_dist_to_center: min_dist_to_center = dist best_circle = (global_cx, global_cy, r) if best_circle: cx, cy, r = best_circle # 确保转换为标准的 Python int,否则 uiautomator2 click 可能会报错 (JSON serializable error) cx, cy, r = int(cx), int(cy), int(r) # [Safety Check] 底部安全区排除 # 如果检测到的圆形按钮位于屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 区域内,认为是误判(如误触底部扫码充电等) if cy > (h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)): logger.warning(f"Ignored Bottom Circle at ({cx}, {cy}) - in Bottom Safety Zone ({int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}%).") return None logger.info(f"Found Bottom Circle Button via Hough: ({cx}, {cy}), r={r}") if debug_dir: os.makedirs(debug_dir, exist_ok=True) debug_img = img.copy() cv2.circle(debug_img, (cx, cy), r, (0, 255, 0), 2) cv2.circle(debug_img, (cx, cy), 2, (0, 0, 255), 3) save_image(os.path.join(debug_dir, "debug_bottom_circle.jpg"), debug_img) return (cx, cy) return None def find_expand_button_position(image_path, debug_dir=None, debug_filename_prefix=None): """ 通过几何特征识别"全部时段"按钮的位置 特征:该行左侧(30%)和右侧(30%)基本为空白,中间有内容 :param image_path: 截图路径 :param debug_dir: 调试图片保存目录,如果为None则不保存 :param debug_filename_prefix: 调试图片文件名前缀 :return: (x, y) 坐标中心点,如果未找到返回 None """ if not os.path.exists(image_path): return None img = read_image(image_path) if img is None: return None h, w = img.shape[:2] # 转灰度 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 边缘检测 edges = cv2.Canny(gray, 50, 150) # 定义区域 left_w = int(w * 0.35) right_w = int(w * 0.65) # 提取各部分边缘 left_part = edges[:, :left_w] right_part = edges[:, right_w:] center_part = edges[:, left_w:right_w] # 计算每一行的边缘点数量 row_sum_left = np.sum(left_part, axis=1) / 255.0 row_sum_right = np.sum(right_part, axis=1) / 255.0 row_sum_center = np.sum(center_part, axis=1) / 255.0 # 筛选条件:左右边缘点很少,中间边缘点较多 # 阈值可以根据实际情况调整 # 允许少量噪点,所以不是严格的0,而是小于某个较小值(例如宽度的1%) noise_threshold = 2 # 允许2个像素的噪点 content_threshold = 5 # 中间至少有5个像素的边缘 candidates = (row_sum_left <= noise_threshold) & \ (row_sum_right <= noise_threshold) & \ (row_sum_center >= content_threshold) # 找到连续的候选行 y_indices = np.where(candidates)[0] if len(y_indices) == 0: return None # 将连续行分组 segments = [] if len(y_indices) > 0: start = y_indices[0] prev = y_indices[0] for y in y_indices[1:]: if y > prev + 5: # 允许5像素断裂 segments.append((start, prev)) start = y prev = y segments.append((start, prev)) # 筛选最合适的段 best_segment = None # 我们期望按钮在屏幕中下部,且高度适中(例如 20-100px) # 且通常是在价格表下方。假设价格表占据了屏幕上部。 # 我们可以简单地取符合条件的段中,Y值最大的那个(最靠下的),或者最符合"中间有字"特征的。 # 考虑到页面底部可能有其他干扰,取"中下部"的一个。 valid_segments = [] for start, end in segments: height = end - start mid_y = (start + end) // 2 # 过滤掉太高或太矮的区域 if height < 20 or height > 150: continue # 过滤掉顶部的区域(可能是标题栏误判) if mid_y < h * 0.3: continue # 过滤掉底部的区域(可能是底部按钮) if mid_y > h * 0.9: continue valid_segments.append((start, end)) if not valid_segments: return None # 如果有多个,通常"全部时段"是在价格表之后,紧接着的一个 # 这里我们取第一个(最靠上的)符合条件的段,因为它紧跟在价格表下方 # 或者取所有段中,中间内容最"紧凑"的? # 让我们简单点,取第一个符合条件的段。 best_segment = valid_segments[0] start, end = best_segment center_y = (start + end) // 2 center_x = w // 2 if debug_dir: os.makedirs(debug_dir, exist_ok=True) debug_img = img.copy() # 画出识别区域 cv2.rectangle(debug_img, (0, start), (w, end), (0, 255, 0), 2) # 画出红点 cv2.circle(debug_img, (center_x, center_y), 10, (0, 0, 255), -1) # 保存 flag 图片 if debug_filename_prefix: # Sanitize filename: remove invalid chars import re safe_prefix = re.sub(r'[\\/*?:"<>|]', '_', str(debug_filename_prefix)) debug_name = f"{safe_prefix}_flag_expand.jpg" else: timestamp = time.strftime("%Y%m%d_%H%M%S") debug_name = f"{timestamp}_flag_expand.jpg" debug_path = os.path.join(debug_dir, debug_name) save_image(debug_path, debug_img) logger.info(f"Saved debug image to {debug_path}") return (int(center_x), int(center_y)) def get_row_stats(gray): """ 计算每一行的统计特征 """ h, w = gray.shape # 中央区域 (用于检测内容) center_x = w // 2 strip_w = 100 center_strip = gray[:, center_x - 50 : center_x + 50] # 边缘区域 (用于检测背景/边距) # 假设边距至少有 10px edge_strip = gray[:, 0:20] row_means = np.mean(center_strip, axis=1) row_stds = np.std(center_strip, axis=1) edge_means = np.mean(edge_strip, axis=1) return row_means, row_stds, edge_means def crop_cards_from_image(img_path, output_dir=None, save_debug=True): """ 从图片中裁剪场站卡片 :param img_path: 图片路径 :param output_dir: 输出目录,默认与 img_path 相同 :param save_debug: 是否保存调试图 (_flag.jpg) :return: 裁剪出的卡片列表,每项包含 (out_path, (click_x, click_y)) """ logger.info(f"Processing: {img_path}") if not os.path.exists(img_path): logger.info(f"Error: File not found {img_path}") return [] img = read_image(img_path) if img is None: logger.info(f"Error: Failed to load image {img_path}") return [] gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) h, w = gray.shape row_means, row_stds, edge_means = get_row_stats(gray) # 参数定义 # 背景灰度值范围 (根据 Analyze2.py 的输出,背景约 242) BG_GRAY_MIN = 230 BG_GRAY_MAX = 250 # 间隙判定:中央区域也是均匀的灰度 # 真正的背景灰度约 242。卡片内的伪背景(不够白)约 246-248。 # 所以降低 GAP_MEAN_MAX 以区分 Gap 和 Dirty White。 GAP_MEAN_MIN = 235 GAP_MEAN_MAX = 244 GAP_STD_MAX = 10.0 segments = [] is_in_card = False start_y = 0 # 记录原始的行状态,用于后续可能的合并优化 row_is_card = [] for y in range(h): rm = row_means[y] rs = row_stds[y] em = edge_means[y] # 1. 判定当前行是否可能是卡片的一部分 # 条件A: 边缘是灰色的 (说明有边距,排除了全宽的Header/Footer) has_gray_margin = (BG_GRAY_MIN <= em <= BG_GRAY_MAX) # 条件B: 中央不是均匀的背景灰 (说明有内容,或者是白色底) # 如果是 Gap,则 Mean 在 Gap范围 且 Std 很小 is_gap = (GAP_MEAN_MIN <= rm <= GAP_MEAN_MAX) and (rs < GAP_STD_MAX) is_card_row = has_gray_margin and not is_gap row_is_card.append(is_card_row) # 简单的形态学闭运算:填补卡片内部的小裂缝 # 如果一个 False (Gap) 的上下都是 True (Card),且 Gap 长度很短,则将其视为 Card # 允许的裂缝最大长度 # [优化] 减小填补阈值,避免把两个卡片中间的真 Gap 填补了导致粘连 # 之前是 10,现在改为 3。 MAX_GAP_FILL = 3 cleaned_row_is_card = row_is_card[:] # 查找连续的 False 区域 i = 0 while i < h: if not cleaned_row_is_card[i]: # Found a gap start gap_start = i while i < h and not cleaned_row_is_card[i]: i += 1 gap_end = i gap_len = gap_end - gap_start # Check context prev_is_card = (gap_start > 0) and cleaned_row_is_card[gap_start - 1] next_is_card = (gap_end < h) and cleaned_row_is_card[gap_end] if prev_is_card and next_is_card and gap_len <= MAX_GAP_FILL: # Fill the gap for k in range(gap_start, gap_end): cleaned_row_is_card[k] = True else: i += 1 # 根据 cleaned_row_is_card 生成 segments is_in_card = False start_y = 0 for y in range(h): if cleaned_row_is_card[y]: if not is_in_card: is_in_card = True start_y = y else: if is_in_card: is_in_card = False end_y = y height = end_y - start_y # 过滤太矮的区域 if height > 100: # 稍微放宽一点,150 -> 100 segments.append((start_y, end_y)) # 处理最后一个 segment if is_in_card: end_y = h height = end_y - start_y if height > 100: segments.append((start_y, end_y)) # [新增] 触底过滤:过滤掉延伸到图片底部的 Segment # 底部通常是导航栏或被截断的卡片 BOTTOM_MARGIN = 50 # 距离底部多少像素内算触底 valid_segments_scan = [] for y1, y2 in segments: if y2 >= h - BOTTOM_MARGIN: logger.info(f" Dropping segment Y={y1}-{y2} because it touches the bottom (H={h}).") continue valid_segments_scan.append((y1, y2)) segments = valid_segments_scan logger.info(f" Found {len(segments)} potential segments based on vertical scan.") # 1.5 Group Segments by Large Gaps (to separate Header / List / Footer) # The filter bar usually creates a large gap (e.g. > 60px). SECTION_GAP_MIN = 60 groups = [] if segments: current_group = [segments[0]] for i in range(1, len(segments)): prev_end = segments[i-1][1] curr_start = segments[i][0] gap = curr_start - prev_end if gap > SECTION_GAP_MIN: groups.append(current_group) current_group = [] current_group.append(segments[i]) groups.append(current_group) logger.info(f" Found {len(groups)} segment groups.") # Select the Target Group # Strategy: Select the LAST group that has at least one 'substantial' segment (H > 150). # This assumes the Station List is the main content and usually at the bottom (scrolling area). target_group = [] for g in reversed(groups): # Check if group has substantial content has_content = False for (y1, y2) in g: if (y2 - y1) > 150: has_content = True break if has_content: target_group = g break if not target_group and segments: # Fallback to all segments if no substantial group found target_group = segments logger.info(f" Selected group with {len(target_group)} segments.") # 2. 确定每个卡片的左右边界 (Width Refinement) # 收集所有 segment 的建议边界,然后统一 candidate_x1 = [] candidate_x2 = [] temp_valid_segments = [] for y1, y2 in target_group: # 取中间一段行来分析 mid_y = (y1 + y2) // 2 sample_h = min(10, y2 - y1) sample_rows = gray[mid_y - sample_h//2 : mid_y + sample_h//2, :] col_means = np.mean(sample_rows, axis=0) # 从左向右找第一个非灰像素 x1 = 0 for x in range(w // 2): if not (BG_GRAY_MIN <= col_means[x] <= BG_GRAY_MAX): x1 = x break # 从右向左找第一个非灰像素 x2 = w for x in range(w - 1, w // 2, -1): if not (BG_GRAY_MIN <= col_means[x] <= BG_GRAY_MAX): x2 = x break if x1 >= x2 or (x2 - x1) < w * 0.5: # 如果这个 segment 无法确定宽度,可能不是有效卡片,暂不参与宽度投票 # 但为了不漏掉,暂时先记录,用默认值填充 logger.warning(f" Warning: Segment {y1}-{y2} has weird width {x2-x1}.") pass else: candidate_x1.append(x1) candidate_x2.append(x2) temp_valid_segments.append((y1, y2)) # 计算统一宽度 if not candidate_x1: logger.info(" No valid width detected. Using default.") final_x1 = 0 final_x2 = w else: # 使用中位数或众数来消除噪声 # 考虑到对齐,Min x1 和 Max x2 可能更合适?或者 Median。 # 通常卡片是对齐的,所以 x1 应该几乎一样。 final_x1 = int(np.median(candidate_x1)) final_x2 = int(np.median(candidate_x2)) # 稍微加点 Padding (但不要超过图片边界) final_x1 = max(0, final_x1 - 5) final_x2 = min(w, final_x2 + 5) logger.info(f" Unified Width: X={final_x1}-{final_x2}, W={final_x2 - final_x1}") # 3. 过滤高度异常的卡片 (Height Filtering) # 计算所有潜在卡片的高度 final_cards = [] if not temp_valid_segments: logger.info(" No segments found.") else: heights = [y2 - y1 for y1, y2 in temp_valid_segments] if not heights: logger.info(" No heights to calculate.") else: max_h = max(heights) median_h = np.median(heights) # 策略:如果高度差异较大,丢弃过小的卡片 # 阈值设定为 Median 的 75% (允许一定程度的偏差,如 220 vs 258 是允许的,但 150 vs 250 应被过滤) # 使用 Median 比 Max 更稳健,避免被单个超高卡片(如广告)带偏 threshold_h = median_h * 0.70 for (y1, y2), h in zip(temp_valid_segments, heights): if h < threshold_h: logger.info(f" Filtering out segment Y={y1}-{y2} (H={h}) because it's too short (Threshold={threshold_h:.1f}).") else: final_cards.append((y1, y2, final_x1, final_x2)) logger.info(f" Card: Y={y1}-{y2}, X={final_x1}-{final_x2}, H={h}") # 4. 保存结果 if output_dir is None: output_dir = os.path.dirname(img_path) base_name = os.path.basename(img_path) stem, ext = os.path.splitext(base_name) # 准备可视化标记图 debug_img = img.copy() results = [] # 准备 JSON 数据结构 json_data = { "image": base_name, "width": w, "height": h, "cards": [] } # 准备 _vl.jpg (只画框,不画红点) vl_img = img.copy() logger.info(f" Step [2.1/VL] 准备在 VL 图片上绘制 {len(final_cards)} 个场站的绿色方框...") for idx, (y1, y2, x1, x2) in enumerate(final_cards): # 计算点击点 (左上角,避免被底部按钮遮挡) # 策略:X偏移 15%, Y偏移 20% w_card = x2 - x1 h_card = y2 - y1 click_x = int(x1 + w_card * 0.15) click_y = int(y1 + h_card * 0.20) # [修改] 不再保存单张子图,只记录元数据 # card = img[y1:y2, x1:x2] # 文件名添加坐标: _ClickX_ClickY # out_name = f"{stem}_{idx + 1}_{click_x}_{click_y}{ext}" # out_path = os.path.join(output_dir, out_name) # cv2.imwrite(out_path, card) # logger.info(f" Saved {out_path}") # results.append((out_path, (click_x, click_y))) # 在标记图上画红点 (实心圆, 半径10, 红色BGR) cv2.circle(debug_img, (click_x, click_y), 10, (0, 0, 255), -1) # [修改] 必须画绿框,因为后续视觉模型依赖这个框来识别范围 cv2.rectangle(debug_img, (x1, y1), (x2, y2), (0, 255, 0), 2) # 在 _vl 图上只画绿框 cv2.rectangle(vl_img, (x1, y1), (x2, y2), (0, 255, 0), 2) # 收集 JSON 数据 card_info = { "id": idx + 1, "rect": [x1, y1, x2, y2], "click_point": [click_x, click_y] } json_data["cards"].append(card_info) # 记录区域信息供调用者使用 (如果需要) # 格式: (None, (click_x, click_y), (x1, y1, x2, y2)) results.append((None, (click_x, click_y), (x1, y1, x2, y2))) # [删除] 之前生成的单张 _for_vl.jpg 逻辑已移除 # 保存标记图 (_flag.jpg) if save_debug: flag_out_path = os.path.join(output_dir, f"{stem}_flag{ext}") save_image(flag_out_path, debug_img) logger.info(f" Saved Debug Image: {flag_out_path}") # 保存 _vl.jpg vl_out_path = os.path.join(output_dir, f"{stem}_vl{ext}") save_image(vl_out_path, vl_img) logger.info(f" Step [2.2/VL] 已保存带有绿色方框的图片: {vl_out_path}") # 保存 .json import json json_out_path = os.path.join(output_dir, f"{stem}.json") with open(json_out_path, 'w', encoding='utf-8') as f: json.dump(json_data, f, ensure_ascii=False, indent=4) logger.info(f" Step [2.3/JSON] 已保存场站坐标元数据: {json_out_path}") return results