aiData/Apps/XinDianTu/Kit.py

import logging
import os
import cv2
import numpy as np
import time
from Config.Config import BOTTOM_SAFE_EXCLUDE_RATIO, TEMP_IMAGE_DIR

logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

def read_image(path):
    """读取图片，支持中文路径"""
    try:
        return cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)
    except Exception as e:
        logger.info(f"Error reading image {path}: {e}")
        return None

def save_image(path, img):
    """保存图片，支持中文路径"""
    try:
        ext = os.path.splitext(path)[1]
        if not ext:
            ext = ".jpg"
        cv2.imencode(ext, img)[1].tofile(path)
        return True
    except Exception as e:
        logger.error(f"Error saving image {path}: {e}")
        return False

# 截图
def take_screenshot(d, image_uuid, save_dir=TEMP_IMAGE_DIR):
    path = f"{save_dir}/{image_uuid}.jpg"
    os.makedirs(save_dir, exist_ok=True)
    d.screenshot(path)
    return path

def click_image_template(d, template_path, timeout=5.0, threshold=0.8):
    """
    使用 OpenCV 模板匹配查找并点击图片
    :param d: uiautomator2 设备对象
    :param template_path: 模板图片路径
    :param timeout: 超时时间（秒）
    :param threshold: 匹配阈值 (0.0 - 1.0)
    :return: 是否点击成功
    """
    if not os.path.exists(template_path):
        logger.info(f"Template file not found: {template_path}")
        return False

    template = read_image(template_path)
    if template is None:
        logger.info(f"Failed to load template: {template_path}")
        return False

    t_h, t_w = template.shape[:2]

    start_time = time.time()
    best_val_overall = 0.0

    while time.time() - start_time < timeout:
        # 临时截图
        temp_uuid = "temp_click_check"
        screenshot_path = take_screenshot(d, temp_uuid, save_dir=TEMP_IMAGE_DIR)

        target = read_image(screenshot_path)
        if target is None:
            time.sleep(0.5)
            continue

        # 多尺度匹配
        found = None
        # 缩放比例从 0.5 到 1.5，步长 0.1
        for scale in np.linspace(0.5, 1.5, 11):
            # 调整模板大小
            resized_template = cv2.resize(template, (int(t_w * scale), int(t_h * scale)))
            r_h, r_w = resized_template.shape[:2]

            # 如果模板比目标还大，跳过
            if r_h > target.shape[0] or r_w > target.shape[1]:
                continue

            result = cv2.matchTemplate(target, resized_template, cv2.TM_CCOEFF_NORMED)
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

            if max_val > best_val_overall:
                best_val_overall = max_val

            if max_val >= threshold:
                found = (max_val, max_loc, r_w, r_h)
                break # 找到满足阈值的即停止当前截图的搜索

        # 清理临时文件
        try:
            os.remove(screenshot_path)
        except:
            pass

        if found:
            max_val, max_loc, r_w, r_h = found
            # 计算中心点
            top_left = max_loc
            center_x = top_left[0] + r_w // 2
            center_y = top_left[1] + r_h // 2

            logger.info(f"Found image at ({center_x}, {center_y}) with confidence {max_val:.2f}")
            d.click(center_x, center_y)
            return True

        time.sleep(1.0)

    logger.info(f"Image not found after {timeout}s (Best confidence: {best_val_overall:.2f})")
    return False

def detect_black_agree_button(image_path, debug_dir=None):
    """
    通过计算机图形学检测黑色的"同意"按钮 (Image 1 场景)
    特征：黑色圆角矩形，位于屏幕中下部，面积适中
    :param image_path: 截图路径
    :return: (x, y) 坐标中心点，如果未找到返回 None
    """
    if not os.path.exists(image_path):
        return None

    img = read_image(image_path)
    if img is None:
        return None

    h, w = img.shape[:2]

    # 转换为HSV颜色空间，因为黑色更容易过滤
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    # 定义黑色的范围
    # 黑色：V (Brightness) 很低
    lower_black = np.array([0, 0, 0])
    upper_black = np.array([180, 255, 40]) # V < 40 认为是黑色

    mask = cv2.inRange(hsv, lower_black, upper_black)

    # 限制搜索区域：通常在屏幕下半部分
    roi_top = int(h * 0.4)
    roi_bottom = int(h * 0.8)
    roi_mask = np.zeros_like(mask)
    roi_mask[roi_top:roi_bottom, :] = mask[roi_top:roi_bottom, :]

    # 形态学操作：去除噪点，连接断开的区域
    kernel = np.ones((5, 5), np.uint8)
    roi_mask = cv2.morphologyEx(roi_mask, cv2.MORPH_CLOSE, kernel)
    roi_mask = cv2.morphologyEx(roi_mask, cv2.MORPH_OPEN, kernel)

    # 查找轮廓
    contours, _ = cv2.findContours(roi_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    best_cnt = None
    max_area = 0

    for cnt in contours:
        area = cv2.contourArea(cnt)
        x, y, cw, ch = cv2.boundingRect(cnt)
        aspect_ratio = float(cw) / ch

        # 过滤条件
        # 1. 面积要够大 (例如 > 屏幕面积的 1%)
        if area < (w * h * 0.01):
            continue

        # 2. 宽高比：通常按钮是扁长条，例如 > 2.0
        if aspect_ratio < 2.0 or aspect_ratio > 10.0:
            continue

        # 3. 宽度：通常占据屏幕宽度的 50% 以上
        if cw < (w * 0.5):
            continue

        if area > max_area:
            max_area = area
            best_cnt = cnt

    if best_cnt is not None:
        x, y, cw, ch = cv2.boundingRect(best_cnt)
        center_x = x + cw // 2
        center_y = y + ch // 2
        logger.info(f"Found Black Agree Button at ({center_x}, {center_y}), Size: {cw}x{ch}")

        if debug_dir:
            os.makedirs(debug_dir, exist_ok=True)
            debug_img = img.copy()
            cv2.rectangle(debug_img, (x, y), (x+cw, y+ch), (0, 0, 255), 2)
            cv2.circle(debug_img, (center_x, center_y), 5, (0, 255, 0), -1)
            save_image(os.path.join(debug_dir, "debug_agree_btn.jpg"), debug_img)

        return (center_x, center_y)

    return None

def detect_ad_close_x(image_path, template_path, debug_dir=None, threshold=0.7):
    """
    通过模板匹配检测"关闭(X)"按钮 (Image 2 场景)
    支持多尺度匹配
    :param image_path: 截图路径
    :param template_path: 模板图片路径
    :param debug_dir: 调试目录
    :param threshold: 匹配阈值
    :return: (x, y) 坐标中心点，如果未找到返回 None
    """
    if not os.path.exists(image_path):
        return None

    if not os.path.exists(template_path):
        logger.warning(f"Template not found: {template_path}")
        return None

    target = read_image(image_path)
    template = read_image(template_path)

    if target is None or template is None:
        return None

    # 转换为灰度图进行匹配，减少颜色干扰
    target_gray = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY)
    template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

    t_h, t_w = template_gray.shape[:2]

    best_match = None

    # 多尺度匹配: 缩放模板
    # 假设模板可能比实际大，也可能小。范围 0.5 - 1.5
    scales = np.linspace(0.5, 1.5, 20)

    for scale in scales:
        # 计算缩放后的模板尺寸
        new_w = int(t_w * scale)
        new_h = int(t_h * scale)

        # 确保缩放后的模板不大于目标图像
        if new_w > target_gray.shape[1] or new_h > target_gray.shape[0]:
            continue

        resized_template = cv2.resize(template_gray, (new_w, new_h))

        # 匹配
        result = cv2.matchTemplate(target_gray, resized_template, cv2.TM_CCOEFF_NORMED)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)

        if best_match is None or max_val > best_match[0]:
            best_match = (max_val, max_loc, scale, new_w, new_h)

    if best_match is None:
        return None

    max_val, max_loc, best_scale, best_w, best_h = best_match

    # 获取目标图像尺寸
    target_h, target_w = target_gray.shape[:2]

    top_left = max_loc
    center_x = top_left[0] + best_w // 2
    center_y = top_left[1] + best_h // 2

    # --- 位置启发式过滤 ---
    # 计算相对位置
    rel_x = center_x / target_w
    rel_y = center_y / target_h

    logger.info(f"Ad Close Button Match: Confidence={max_val:.4f}, Scale={best_scale:.2f}, Pos=({center_x}, {center_y}), Rel=({rel_x:.2f}, {rel_y:.2f})")

    is_valid_pos = True

    # 规则1: 过滤掉屏幕正中央偏上的区域 (通常是广告标题、图标或内容)
    # 范围: X在 [0.3, 0.7] 且 Y在 [0.15, 0.5]
    if 0.3 < rel_x < 0.7 and 0.15 < rel_y < 0.5:
        logger.warning(f"Ignored match at ({center_x}, {center_y}) - likely Ad Content/Title (Center-Top area).")
        is_valid_pos = False

    # 规则2: 如果置信度不是特别高 (>0.9), 强制要求在典型区域 (右上角 或 底部中间)
    # 右上角: X > 0.7, Y < 0.5
    # 底部中间: Y > 0.6
    if is_valid_pos and max_val < 0.98: # 如果置信度非常高(0.98+)，可能是极其标准的X，暂时放过(除非在禁区)
        if not ((rel_x > 0.7 and rel_y < 0.5) or (rel_y > 0.6)):
             logger.warning(f"Ignored match at ({center_x}, {center_y}) - not in typical Close Button regions (Top-Right or Bottom).")
             is_valid_pos = False

    if max_val >= threshold:
        if is_valid_pos:
            # [Safety Check] 底部安全区排除
            # 如果检测到的关闭按钮位于屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 区域内，认为是误判（如误触底部功能按钮）
            if center_y > (target_h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
                logger.warning(f"Ignored Ad Close Button at ({center_x}, {center_y}) - in Bottom Safety Zone ({int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}%).")
                # 也可以保存一下调试图
                if debug_dir:
                    os.makedirs(debug_dir, exist_ok=True)
                    debug_img = target.copy()
                    cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 0, 128), 2) # Dark Red for Safety Ignored
                    cv2.putText(debug_img, f"SAFETY IGNORED",
                                (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 128), 1)
                    save_image(os.path.join(debug_dir, "debug_ad_close_safety_ignored.jpg"), debug_img)
                return None

            logger.info(f"Found Ad Close Button at ({center_x}, {center_y})")

            if debug_dir:
                os.makedirs(debug_dir, exist_ok=True)
                debug_img = target.copy()
                cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 0, 255), 2)
                cv2.circle(debug_img, (center_x, center_y), 5, (0, 255, 0), -1)
                cv2.putText(debug_img, f"Conf: {max_val:.2f}, Sc: {best_scale:.2f}",
                            (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
                save_image(os.path.join(debug_dir, "debug_ad_close_x.jpg"), debug_img)

            return (center_x, center_y)
        else:
             # 虽然置信度高，但是位置不对，保存为 False Positive 供调试
             if debug_dir:
                os.makedirs(debug_dir, exist_ok=True)
                debug_img = target.copy()
                cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 165, 255), 2) # Orange for ignored
                cv2.putText(debug_img, f"IGNORED Pos",
                            (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 165, 255), 1)
                save_image(os.path.join(debug_dir, "debug_ad_close_ignored.jpg"), debug_img)

    # 如果没找到，但有一定置信度，也保存一下调试图以便分析
    if max_val > 0.4 and debug_dir:
        os.makedirs(debug_dir, exist_ok=True)
        debug_img = target.copy()
        top_left = max_loc
        cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 255, 255), 2)
        cv2.putText(debug_img, f"Failed Conf: {max_val:.2f}",
                    (top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
        save_image(os.path.join(debug_dir, "debug_ad_close_fail.jpg"), debug_img)

    return None

def detect_any_ad_close(image_path, template_dir, debug_dir=None):
    """
    遍历模板目录下的所有 ad_close*.jpg 进行匹配
    """
    if not os.path.exists(template_dir):
        return None

    for filename in os.listdir(template_dir):
        if filename.startswith("ad_close") and filename.endswith(".jpg"):
            template_path = os.path.join(template_dir, filename)
            logger.info(f"Trying template: {filename}")
            pos = detect_ad_close_x(image_path, template_path, debug_dir=debug_dir)
            if pos:
                return pos
    return None

def detect_bottom_close_circle(image_path, debug_dir=None):
    """
    通过几何特征检测底部的圆形关闭按钮 (常见于插屏广告)
    特征：
    1. 位于屏幕底部区域 (Y > 60%)
    2. 水平居中 (X 靠近 W/2)
    3. 圆形或近似圆形
    4. 内部有高对比度边缘 (X号)
    """
    if not os.path.exists(image_path):
        return None

    img = read_image(image_path)
    if img is None:
        return None

    h, w = img.shape[:2]

    # 1. 提取感兴趣区域 (ROI): 屏幕底部 25% (缩小范围，避开列表内容区)
    roi_top = int(h * 0.75)
    roi_h = h - roi_top
    roi = img[roi_top:h, :]

    # 转灰度
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    # 高斯模糊降噪
    gray_blurred = cv2.GaussianBlur(gray, (9, 9), 2)

    # 2. 霍夫圆变换检测圆形
    # dp=1.2 (累加器分辨率), minDist=w/5 (圆心最小距离), param1=100 (Canny高阈值), param2=30 (圆心累加阈值), minR=w*0.04, maxR=w*0.1
    # 缩小最大半径范围，防止识别到过大的按钮（如扫码充电按钮）
    circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, dp=1.2, minDist=w/5,
                               param1=100, param2=30, minRadius=int(w*0.04), maxRadius=int(w*0.09))

    if circles is not None:
        circles = np.round(circles[0, :]).astype("int")

        best_circle = None
        min_dist_to_center = float('inf')

        for (cx, cy, r) in circles:
            # 还原到全图坐标
            global_cy = roi_top + cy
            global_cx = cx

            # 过滤1: 必须在水平中心附近 (容差 15%，收紧限制)
            if abs(global_cx - w//2) > (w * 0.15):
                continue

            # 过滤2: 垂直方向限制 (必须在屏幕 80% - 95% 之间)
            # 这样可以避开位于 60%-80% 区域的列表项卡片
            rel_y = global_cy / h
            if rel_y < 0.80 or rel_y > 0.95:
                continue

            dist = abs(global_cx - w//2)
            if dist < min_dist_to_center:
                min_dist_to_center = dist
                best_circle = (global_cx, global_cy, r)

        if best_circle:
            cx, cy, r = best_circle
            # 确保转换为标准的 Python int，否则 uiautomator2 click 可能会报错 (JSON serializable error)
            cx, cy, r = int(cx), int(cy), int(r)

            # [Safety Check] 底部安全区排除
            # 如果检测到的圆形按钮位于屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 区域内，认为是误判（如误触底部扫码充电等）
            if cy > (h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
                logger.warning(f"Ignored Bottom Circle at ({cx}, {cy}) - in Bottom Safety Zone ({int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}%).")
                return None

            logger.info(f"Found Bottom Circle Button via Hough: ({cx}, {cy}), r={r}")

            if debug_dir:
                os.makedirs(debug_dir, exist_ok=True)
                debug_img = img.copy()
                cv2.circle(debug_img, (cx, cy), r, (0, 255, 0), 2)
                cv2.circle(debug_img, (cx, cy), 2, (0, 0, 255), 3)
                save_image(os.path.join(debug_dir, "debug_bottom_circle.jpg"), debug_img)

            return (cx, cy)

    return None

def find_expand_button_position(image_path, debug_dir=None, debug_filename_prefix=None):
    """
    通过几何特征识别"全部时段"按钮的位置
    特征：该行左侧(30%)和右侧(30%)基本为空白，中间有内容
    :param image_path: 截图路径
    :param debug_dir: 调试图片保存目录，如果为None则不保存
    :param debug_filename_prefix: 调试图片文件名前缀
    :return: (x, y) 坐标中心点，如果未找到返回 None
    """
    if not os.path.exists(image_path):
        return None

    img = read_image(image_path)
    if img is None:
        return None

    h, w = img.shape[:2]

    # 转灰度
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # 边缘检测
    edges = cv2.Canny(gray, 50, 150)

    # 定义区域
    left_w = int(w * 0.35)
    right_w = int(w * 0.65)

    # 提取各部分边缘
    left_part = edges[:, :left_w]
    right_part = edges[:, right_w:]
    center_part = edges[:, left_w:right_w]

    # 计算每一行的边缘点数量
    row_sum_left = np.sum(left_part, axis=1) / 255.0
    row_sum_right = np.sum(right_part, axis=1) / 255.0
    row_sum_center = np.sum(center_part, axis=1) / 255.0

    # 筛选条件：左右边缘点很少，中间边缘点较多
    # 阈值可以根据实际情况调整
    # 允许少量噪点，所以不是严格的0，而是小于某个较小值（例如宽度的1%）
    noise_threshold = 2 # 允许2个像素的噪点
    content_threshold = 5 # 中间至少有5个像素的边缘

    candidates = (row_sum_left <= noise_threshold) & \
                 (row_sum_right <= noise_threshold) & \
                 (row_sum_center >= content_threshold)

    # 找到连续的候选行
    y_indices = np.where(candidates)[0]

    if len(y_indices) == 0:
        return None

    # 将连续行分组
    segments = []
    if len(y_indices) > 0:
        start = y_indices[0]
        prev = y_indices[0]
        for y in y_indices[1:]:
            if y > prev + 5: # 允许5像素断裂
                segments.append((start, prev))
                start = y
            prev = y
        segments.append((start, prev))

    # 筛选最合适的段
    best_segment = None

    # 我们期望按钮在屏幕中下部，且高度适中（例如 20-100px）
    # 且通常是在价格表下方。假设价格表占据了屏幕上部。
    # 我们可以简单地取符合条件的段中，Y值最大的那个（最靠下的），或者最符合"中间有字"特征的。
    # 考虑到页面底部可能有其他干扰，取"中下部"的一个。

    valid_segments = []
    for start, end in segments:
        height = end - start
        mid_y = (start + end) // 2

        # 过滤掉太高或太矮的区域
        if height < 20 or height > 150:
            continue

        # 过滤掉顶部的区域（可能是标题栏误判）
        if mid_y < h * 0.3:
            continue

        # 过滤掉底部的区域（可能是底部按钮）
        if mid_y > h * 0.9:
            continue

        valid_segments.append((start, end))

    if not valid_segments:
        return None

    # 如果有多个，通常"全部时段"是在价格表之后，紧接着的一个
    # 这里我们取第一个（最靠上的）符合条件的段，因为它紧跟在价格表下方
    # 或者取所有段中，中间内容最"紧凑"的？
    # 让我们简单点，取第一个符合条件的段。
    best_segment = valid_segments[0]

    start, end = best_segment
    center_y = (start + end) // 2
    center_x = w // 2

    if debug_dir:
        os.makedirs(debug_dir, exist_ok=True)
        debug_img = img.copy()
        # 画出识别区域
        cv2.rectangle(debug_img, (0, start), (w, end), (0, 255, 0), 2)
        # 画出红点
        cv2.circle(debug_img, (center_x, center_y), 10, (0, 0, 255), -1)

        # 保存 flag 图片
        if debug_filename_prefix:
            # Sanitize filename: remove invalid chars
            import re
            safe_prefix = re.sub(r'[\\/*?:"<>|]', '_', str(debug_filename_prefix))
            debug_name = f"{safe_prefix}_flag_expand.jpg"
        else:
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            debug_name = f"{timestamp}_flag_expand.jpg"

        debug_path = os.path.join(debug_dir, debug_name)
        save_image(debug_path, debug_img)
        logger.info(f"Saved debug image to {debug_path}")

    return (int(center_x), int(center_y))


def get_row_stats(gray):
    """
    计算每一行的统计特征
    """
    h, w = gray.shape

    # 中央区域 (用于检测内容)
    center_x = w // 2
    strip_w = 100
    center_strip = gray[:, center_x - 50 : center_x + 50]

    # 边缘区域 (用于检测背景/边距)
    # 假设边距至少有 10px
    edge_strip = gray[:, 0:20]

    row_means = np.mean(center_strip, axis=1)
    row_stds = np.std(center_strip, axis=1)
    edge_means = np.mean(edge_strip, axis=1)

    return row_means, row_stds, edge_means

def crop_cards_from_image(img_path, output_dir=None, save_debug=True):
    """
    从图片中裁剪场站卡片
    :param img_path: 图片路径
    :param output_dir: 输出目录，默认与 img_path 相同
    :param save_debug: 是否保存调试图 (_flag.jpg)
    :return: 裁剪出的卡片列表，每项包含 (out_path, (click_x, click_y))
    """
    logger.info(f"Processing: {img_path}")
    if not os.path.exists(img_path):
        logger.info(f"Error: File not found {img_path}")
        return []

    img = read_image(img_path)
    if img is None:
        logger.info(f"Error: Failed to load image {img_path}")
        return []

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    h, w = gray.shape

    row_means, row_stds, edge_means = get_row_stats(gray)

    # 参数定义
    # 背景灰度值范围 (根据 Analyze2.py 的输出，背景约 242)
    BG_GRAY_MIN = 230
    BG_GRAY_MAX = 250

    # 间隙判定：中央区域也是均匀的灰度
    # 真正的背景灰度约 242。卡片内的伪背景(不够白)约 246-248。
    # 所以降低 GAP_MEAN_MAX 以区分 Gap 和 Dirty White。
    GAP_MEAN_MIN = 235
    GAP_MEAN_MAX = 244
    GAP_STD_MAX = 10.0

    segments = []
    is_in_card = False
    start_y = 0

    # 记录原始的行状态，用于后续可能的合并优化
    row_is_card = []

    for y in range(h):
        rm = row_means[y]
        rs = row_stds[y]
        em = edge_means[y]

        # 1. 判定当前行是否可能是卡片的一部分
        # 条件A: 边缘是灰色的 (说明有边距，排除了全宽的Header/Footer)
        has_gray_margin = (BG_GRAY_MIN <= em <= BG_GRAY_MAX)

        # 条件B: 中央不是均匀的背景灰 (说明有内容，或者是白色底)
        # 如果是 Gap，则 Mean 在 Gap范围 且 Std 很小
        is_gap = (GAP_MEAN_MIN <= rm <= GAP_MEAN_MAX) and (rs < GAP_STD_MAX)

        is_card_row = has_gray_margin and not is_gap
        row_is_card.append(is_card_row)

    # 简单的形态学闭运算：填补卡片内部的小裂缝
    # 如果一个 False (Gap) 的上下都是 True (Card)，且 Gap 长度很短，则将其视为 Card
    # 允许的裂缝最大长度
    # [优化] 减小填补阈值，避免把两个卡片中间的真 Gap 填补了导致粘连
    # 之前是 10，现在改为 3。
    MAX_GAP_FILL = 3

    cleaned_row_is_card = row_is_card[:]

    # 查找连续的 False 区域
    i = 0
    while i < h:
        if not cleaned_row_is_card[i]:
            # Found a gap start
            gap_start = i
            while i < h and not cleaned_row_is_card[i]:
                i += 1
            gap_end = i
            gap_len = gap_end - gap_start

            # Check context
            prev_is_card = (gap_start > 0) and cleaned_row_is_card[gap_start - 1]
            next_is_card = (gap_end < h) and cleaned_row_is_card[gap_end]

            if prev_is_card and next_is_card and gap_len <= MAX_GAP_FILL:
                # Fill the gap
                for k in range(gap_start, gap_end):
                    cleaned_row_is_card[k] = True
        else:
            i += 1

    # 根据 cleaned_row_is_card 生成 segments
    is_in_card = False
    start_y = 0
    for y in range(h):
        if cleaned_row_is_card[y]:
            if not is_in_card:
                is_in_card = True
                start_y = y
        else:
            if is_in_card:
                is_in_card = False
                end_y = y
                height = end_y - start_y

                # 过滤太矮的区域
                if height > 100: # 稍微放宽一点，150 -> 100
                    segments.append((start_y, end_y))

    # 处理最后一个 segment
    if is_in_card:
        end_y = h
        height = end_y - start_y
        if height > 100:
            segments.append((start_y, end_y))

    # [新增] 触底过滤：过滤掉延伸到图片底部的 Segment
    # 底部通常是导航栏或被截断的卡片
    BOTTOM_MARGIN = 50 # 距离底部多少像素内算触底
    valid_segments_scan = []
    for y1, y2 in segments:
        if y2 >= h - BOTTOM_MARGIN:
            logger.info(f"  Dropping segment Y={y1}-{y2} because it touches the bottom (H={h}).")
            continue
        valid_segments_scan.append((y1, y2))
    segments = valid_segments_scan

    logger.info(f"  Found {len(segments)} potential segments based on vertical scan.")

    # 1.5 Group Segments by Large Gaps (to separate Header / List / Footer)
    # The filter bar usually creates a large gap (e.g. > 60px).
    SECTION_GAP_MIN = 60
    groups = []
    if segments:
        current_group = [segments[0]]
        for i in range(1, len(segments)):
            prev_end = segments[i-1][1]
            curr_start = segments[i][0]
            gap = curr_start - prev_end

            if gap > SECTION_GAP_MIN:
                groups.append(current_group)
                current_group = []

            current_group.append(segments[i])
        groups.append(current_group)

    logger.info(f"  Found {len(groups)} segment groups.")

    # Select the Target Group
    # Strategy: Select the LAST group that has at least one 'substantial' segment (H > 150).
    # This assumes the Station List is the main content and usually at the bottom (scrolling area).
    target_group = []
    for g in reversed(groups):
        # Check if group has substantial content
        has_content = False
        for (y1, y2) in g:
            if (y2 - y1) > 150:
                has_content = True
                break

        if has_content:
            target_group = g
            break

    if not target_group and segments:
        # Fallback to all segments if no substantial group found
        target_group = segments

    logger.info(f"  Selected group with {len(target_group)} segments.")

    # 2. 确定每个卡片的左右边界 (Width Refinement)
    # 收集所有 segment 的建议边界，然后统一

    candidate_x1 = []
    candidate_x2 = []

    temp_valid_segments = []

    for y1, y2 in target_group:
        # 取中间一段行来分析
        mid_y = (y1 + y2) // 2
        sample_h = min(10, y2 - y1)
        sample_rows = gray[mid_y - sample_h//2 : mid_y + sample_h//2, :]
        col_means = np.mean(sample_rows, axis=0)

        # 从左向右找第一个非灰像素
        x1 = 0
        for x in range(w // 2):
            if not (BG_GRAY_MIN <= col_means[x] <= BG_GRAY_MAX):
                x1 = x
                break

        # 从右向左找第一个非灰像素
        x2 = w
        for x in range(w - 1, w // 2, -1):
            if not (BG_GRAY_MIN <= col_means[x] <= BG_GRAY_MAX):
                x2 = x
                break

        if x1 >= x2 or (x2 - x1) < w * 0.5:
             # 如果这个 segment 无法确定宽度，可能不是有效卡片，暂不参与宽度投票
             # 但为了不漏掉，暂时先记录，用默认值填充
             logger.warning(f"  Warning: Segment {y1}-{y2} has weird width {x2-x1}.")
             pass
        else:
            candidate_x1.append(x1)
            candidate_x2.append(x2)

        temp_valid_segments.append((y1, y2))

    # 计算统一宽度
    if not candidate_x1:
        logger.info("  No valid width detected. Using default.")
        final_x1 = 0
        final_x2 = w
    else:
        # 使用中位数或众数来消除噪声
        # 考虑到对齐，Min x1 和 Max x2 可能更合适？或者 Median。
        # 通常卡片是对齐的，所以 x1 应该几乎一样。
        final_x1 = int(np.median(candidate_x1))
        final_x2 = int(np.median(candidate_x2))

        # 稍微加点 Padding (但不要超过图片边界)
        final_x1 = max(0, final_x1 - 5)
        final_x2 = min(w, final_x2 + 5)

    logger.info(f"  Unified Width: X={final_x1}-{final_x2}, W={final_x2 - final_x1}")

    # 3. 过滤高度异常的卡片 (Height Filtering)
    # 计算所有潜在卡片的高度
    final_cards = []
    if not temp_valid_segments:
        logger.info("  No segments found.")
    else:
        heights = [y2 - y1 for y1, y2 in temp_valid_segments]
        if not heights:
             logger.info("  No heights to calculate.")
        else:
            max_h = max(heights)
            median_h = np.median(heights)

            # 策略：如果高度差异较大，丢弃过小的卡片
            # 阈值设定为 Median 的 75% (允许一定程度的偏差，如 220 vs 258 是允许的，但 150 vs 250 应被过滤)
            # 使用 Median 比 Max 更稳健，避免被单个超高卡片(如广告)带偏
            threshold_h = median_h * 0.70

            for (y1, y2), h in zip(temp_valid_segments, heights):
                if h < threshold_h:
                    logger.info(f"  Filtering out segment Y={y1}-{y2} (H={h}) because it's too short (Threshold={threshold_h:.1f}).")
                else:
                    final_cards.append((y1, y2, final_x1, final_x2))
                    logger.info(f"  Card: Y={y1}-{y2}, X={final_x1}-{final_x2}, H={h}")

    # 4. 保存结果
    if output_dir is None:
        output_dir = os.path.dirname(img_path)

    base_name = os.path.basename(img_path)
    stem, ext = os.path.splitext(base_name)

    # 准备可视化标记图
    debug_img = img.copy()

    results = []

    # 准备 JSON 数据结构
    json_data = {
        "image": base_name,
        "width": w,
        "height": h,
        "cards": []
    }

    # 准备 _vl.jpg (只画框，不画红点)
    vl_img = img.copy()

    logger.info(f"  Step [2.1/VL] 准备在 VL 图片上绘制 {len(final_cards)} 个场站的绿色方框...")

    for idx, (y1, y2, x1, x2) in enumerate(final_cards):
        # 计算点击点 (左上角，避免被底部按钮遮挡)
        # 策略：X偏移 15%, Y偏移 20%
        w_card = x2 - x1
        h_card = y2 - y1
        click_x = int(x1 + w_card * 0.15)
        click_y = int(y1 + h_card * 0.20)

        # [修改] 不再保存单张子图，只记录元数据
        # card = img[y1:y2, x1:x2]
        # 文件名添加坐标: _ClickX_ClickY
        # out_name = f"{stem}_{idx + 1}_{click_x}_{click_y}{ext}"
        # out_path = os.path.join(output_dir, out_name)
        # cv2.imwrite(out_path, card)
        # logger.info(f"  Saved {out_path}")

        # results.append((out_path, (click_x, click_y)))

        # 在标记图上画红点 (实心圆, 半径10, 红色BGR)
        cv2.circle(debug_img, (click_x, click_y), 10, (0, 0, 255), -1)
        # [修改] 必须画绿框，因为后续视觉模型依赖这个框来识别范围
        cv2.rectangle(debug_img, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # 在 _vl 图上只画绿框
        cv2.rectangle(vl_img, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # 收集 JSON 数据
        card_info = {
            "id": idx + 1,
            "rect": [x1, y1, x2, y2],
            "click_point": [click_x, click_y]
        }
        json_data["cards"].append(card_info)

        # 记录区域信息供调用者使用 (如果需要)
        # 格式: (None, (click_x, click_y), (x1, y1, x2, y2))
        results.append((None, (click_x, click_y), (x1, y1, x2, y2)))

        # [删除] 之前生成的单张 _for_vl.jpg 逻辑已移除

    # 保存标记图 (_flag.jpg)
    if save_debug:
        flag_out_path = os.path.join(output_dir, f"{stem}_flag{ext}")
        save_image(flag_out_path, debug_img)
        logger.info(f"  Saved Debug Image: {flag_out_path}")

        # 保存 _vl.jpg
        vl_out_path = os.path.join(output_dir, f"{stem}_vl{ext}")
        save_image(vl_out_path, vl_img)
        logger.info(f"  Step [2.2/VL] 已保存带有绿色方框的图片: {vl_out_path}")

        # 保存 .json
        import json
        json_out_path = os.path.join(output_dir, f"{stem}.json")
        with open(json_out_path, 'w', encoding='utf-8') as f:
            json.dump(json_data, f, ensure_ascii=False, indent=4)
        logger.info(f"  Step [2.3/JSON] 已保存场站坐标元数据: {json_out_path}")

    return results