Files
aiData/Apps/XinDianTu/Kit.py
HuangHai ca23ebf606 'commit'
2026-01-12 08:09:32 +08:00

923 lines
35 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import logging
import os
import cv2
import numpy as np
import time
from Config.Config import BOTTOM_SAFE_EXCLUDE_RATIO, TEMP_IMAGE_DIR
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
def read_image(path):
"""读取图片,支持中文路径"""
try:
return cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)
except Exception as e:
logger.info(f"Error reading image {path}: {e}")
return None
def save_image(path, img):
"""保存图片,支持中文路径"""
try:
ext = os.path.splitext(path)[1]
if not ext:
ext = ".jpg"
cv2.imencode(ext, img)[1].tofile(path)
return True
except Exception as e:
logger.error(f"Error saving image {path}: {e}")
return False
# 截图
def take_screenshot(d, image_uuid, save_dir=TEMP_IMAGE_DIR):
path = f"{save_dir}/{image_uuid}.jpg"
os.makedirs(save_dir, exist_ok=True)
d.screenshot(path)
return path
def click_image_template(d, template_path, timeout=5.0, threshold=0.8):
"""
使用 OpenCV 模板匹配查找并点击图片
:param d: uiautomator2 设备对象
:param template_path: 模板图片路径
:param timeout: 超时时间(秒)
:param threshold: 匹配阈值 (0.0 - 1.0)
:return: 是否点击成功
"""
if not os.path.exists(template_path):
logger.info(f"Template file not found: {template_path}")
return False
template = read_image(template_path)
if template is None:
logger.info(f"Failed to load template: {template_path}")
return False
t_h, t_w = template.shape[:2]
start_time = time.time()
best_val_overall = 0.0
while time.time() - start_time < timeout:
# 临时截图
temp_uuid = "temp_click_check"
screenshot_path = take_screenshot(d, temp_uuid, save_dir=TEMP_IMAGE_DIR)
target = read_image(screenshot_path)
if target is None:
time.sleep(0.5)
continue
# 多尺度匹配
found = None
# 缩放比例从 0.5 到 1.5,步长 0.1
for scale in np.linspace(0.5, 1.5, 11):
# 调整模板大小
resized_template = cv2.resize(template, (int(t_w * scale), int(t_h * scale)))
r_h, r_w = resized_template.shape[:2]
# 如果模板比目标还大,跳过
if r_h > target.shape[0] or r_w > target.shape[1]:
continue
result = cv2.matchTemplate(target, resized_template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if max_val > best_val_overall:
best_val_overall = max_val
if max_val >= threshold:
found = (max_val, max_loc, r_w, r_h)
break # 找到满足阈值的即停止当前截图的搜索
# 清理临时文件
try:
os.remove(screenshot_path)
except:
pass
if found:
max_val, max_loc, r_w, r_h = found
# 计算中心点
top_left = max_loc
center_x = top_left[0] + r_w // 2
center_y = top_left[1] + r_h // 2
logger.info(f"Found image at ({center_x}, {center_y}) with confidence {max_val:.2f}")
d.click(center_x, center_y)
return True
time.sleep(1.0)
logger.info(f"Image not found after {timeout}s (Best confidence: {best_val_overall:.2f})")
return False
def detect_black_agree_button(image_path, debug_dir=None):
"""
通过计算机图形学检测黑色的"同意"按钮 (Image 1 场景)
特征:黑色圆角矩形,位于屏幕中下部,面积适中
:param image_path: 截图路径
:return: (x, y) 坐标中心点,如果未找到返回 None
"""
if not os.path.exists(image_path):
return None
img = read_image(image_path)
if img is None:
return None
h, w = img.shape[:2]
# 转换为HSV颜色空间因为黑色更容易过滤
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# 定义黑色的范围
# 黑色V (Brightness) 很低
lower_black = np.array([0, 0, 0])
upper_black = np.array([180, 255, 40]) # V < 40 认为是黑色
mask = cv2.inRange(hsv, lower_black, upper_black)
# 限制搜索区域:通常在屏幕下半部分
roi_top = int(h * 0.4)
roi_bottom = int(h * 0.8)
roi_mask = np.zeros_like(mask)
roi_mask[roi_top:roi_bottom, :] = mask[roi_top:roi_bottom, :]
# 形态学操作:去除噪点,连接断开的区域
kernel = np.ones((5, 5), np.uint8)
roi_mask = cv2.morphologyEx(roi_mask, cv2.MORPH_CLOSE, kernel)
roi_mask = cv2.morphologyEx(roi_mask, cv2.MORPH_OPEN, kernel)
# 查找轮廓
contours, _ = cv2.findContours(roi_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
best_cnt = None
max_area = 0
for cnt in contours:
area = cv2.contourArea(cnt)
x, y, cw, ch = cv2.boundingRect(cnt)
aspect_ratio = float(cw) / ch
# 过滤条件
# 1. 面积要够大 (例如 > 屏幕面积的 1%)
if area < (w * h * 0.01):
continue
# 2. 宽高比:通常按钮是扁长条,例如 > 2.0
if aspect_ratio < 2.0 or aspect_ratio > 10.0:
continue
# 3. 宽度:通常占据屏幕宽度的 50% 以上
if cw < (w * 0.5):
continue
if area > max_area:
max_area = area
best_cnt = cnt
if best_cnt is not None:
x, y, cw, ch = cv2.boundingRect(best_cnt)
center_x = x + cw // 2
center_y = y + ch // 2
logger.info(f"Found Black Agree Button at ({center_x}, {center_y}), Size: {cw}x{ch}")
if debug_dir:
os.makedirs(debug_dir, exist_ok=True)
debug_img = img.copy()
cv2.rectangle(debug_img, (x, y), (x+cw, y+ch), (0, 0, 255), 2)
cv2.circle(debug_img, (center_x, center_y), 5, (0, 255, 0), -1)
save_image(os.path.join(debug_dir, "debug_agree_btn.jpg"), debug_img)
return (center_x, center_y)
return None
def detect_ad_close_x(image_path, template_path, debug_dir=None, threshold=0.7):
"""
通过模板匹配检测"关闭(X)"按钮 (Image 2 场景)
支持多尺度匹配
:param image_path: 截图路径
:param template_path: 模板图片路径
:param debug_dir: 调试目录
:param threshold: 匹配阈值
:return: (x, y) 坐标中心点,如果未找到返回 None
"""
if not os.path.exists(image_path):
return None
if not os.path.exists(template_path):
logger.warning(f"Template not found: {template_path}")
return None
target = read_image(image_path)
template = read_image(template_path)
if target is None or template is None:
return None
# 转换为灰度图进行匹配,减少颜色干扰
target_gray = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY)
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
t_h, t_w = template_gray.shape[:2]
best_match = None
# 多尺度匹配: 缩放模板
# 假设模板可能比实际大,也可能小。范围 0.5 - 1.5
scales = np.linspace(0.5, 1.5, 20)
for scale in scales:
# 计算缩放后的模板尺寸
new_w = int(t_w * scale)
new_h = int(t_h * scale)
# 确保缩放后的模板不大于目标图像
if new_w > target_gray.shape[1] or new_h > target_gray.shape[0]:
continue
resized_template = cv2.resize(template_gray, (new_w, new_h))
# 匹配
result = cv2.matchTemplate(target_gray, resized_template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
if best_match is None or max_val > best_match[0]:
best_match = (max_val, max_loc, scale, new_w, new_h)
if best_match is None:
return None
max_val, max_loc, best_scale, best_w, best_h = best_match
# 获取目标图像尺寸
target_h, target_w = target_gray.shape[:2]
top_left = max_loc
center_x = top_left[0] + best_w // 2
center_y = top_left[1] + best_h // 2
# --- 位置启发式过滤 ---
# 计算相对位置
rel_x = center_x / target_w
rel_y = center_y / target_h
logger.info(f"Ad Close Button Match: Confidence={max_val:.4f}, Scale={best_scale:.2f}, Pos=({center_x}, {center_y}), Rel=({rel_x:.2f}, {rel_y:.2f})")
is_valid_pos = True
# 规则1: 过滤掉屏幕正中央偏上的区域 (通常是广告标题、图标或内容)
# 范围: X在 [0.3, 0.7] 且 Y在 [0.15, 0.5]
if 0.3 < rel_x < 0.7 and 0.15 < rel_y < 0.5:
logger.warning(f"Ignored match at ({center_x}, {center_y}) - likely Ad Content/Title (Center-Top area).")
is_valid_pos = False
# 规则2: 如果置信度不是特别高 (>0.9), 强制要求在典型区域 (右上角 或 底部中间)
# 右上角: X > 0.7, Y < 0.5
# 底部中间: Y > 0.6
if is_valid_pos and max_val < 0.98: # 如果置信度非常高(0.98+)可能是极其标准的X暂时放过(除非在禁区)
if not ((rel_x > 0.7 and rel_y < 0.5) or (rel_y > 0.6)):
logger.warning(f"Ignored match at ({center_x}, {center_y}) - not in typical Close Button regions (Top-Right or Bottom).")
is_valid_pos = False
if max_val >= threshold:
if is_valid_pos:
# [Safety Check] 底部安全区排除
# 如果检测到的关闭按钮位于屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 区域内,认为是误判(如误触底部功能按钮)
if center_y > (target_h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
logger.warning(f"Ignored Ad Close Button at ({center_x}, {center_y}) - in Bottom Safety Zone ({int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}%).")
# 也可以保存一下调试图
if debug_dir:
os.makedirs(debug_dir, exist_ok=True)
debug_img = target.copy()
cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 0, 128), 2) # Dark Red for Safety Ignored
cv2.putText(debug_img, f"SAFETY IGNORED",
(top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 128), 1)
save_image(os.path.join(debug_dir, "debug_ad_close_safety_ignored.jpg"), debug_img)
return None
logger.info(f"Found Ad Close Button at ({center_x}, {center_y})")
if debug_dir:
os.makedirs(debug_dir, exist_ok=True)
debug_img = target.copy()
cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 0, 255), 2)
cv2.circle(debug_img, (center_x, center_y), 5, (0, 255, 0), -1)
cv2.putText(debug_img, f"Conf: {max_val:.2f}, Sc: {best_scale:.2f}",
(top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
save_image(os.path.join(debug_dir, "debug_ad_close_x.jpg"), debug_img)
return (center_x, center_y)
else:
# 虽然置信度高,但是位置不对,保存为 False Positive 供调试
if debug_dir:
os.makedirs(debug_dir, exist_ok=True)
debug_img = target.copy()
cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 165, 255), 2) # Orange for ignored
cv2.putText(debug_img, f"IGNORED Pos",
(top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 165, 255), 1)
save_image(os.path.join(debug_dir, "debug_ad_close_ignored.jpg"), debug_img)
# 如果没找到,但有一定置信度,也保存一下调试图以便分析
if max_val > 0.4 and debug_dir:
os.makedirs(debug_dir, exist_ok=True)
debug_img = target.copy()
top_left = max_loc
cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 255, 255), 2)
cv2.putText(debug_img, f"Failed Conf: {max_val:.2f}",
(top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
save_image(os.path.join(debug_dir, "debug_ad_close_fail.jpg"), debug_img)
return None
def detect_any_ad_close(image_path, template_dir, debug_dir=None):
"""
遍历模板目录下的所有 ad_close*.jpg 进行匹配
"""
if not os.path.exists(template_dir):
return None
for filename in os.listdir(template_dir):
if filename.startswith("ad_close") and filename.endswith(".jpg"):
template_path = os.path.join(template_dir, filename)
logger.info(f"Trying template: {filename}")
pos = detect_ad_close_x(image_path, template_path, debug_dir=debug_dir)
if pos:
return pos
return None
def detect_bottom_close_circle(image_path, debug_dir=None):
"""
通过几何特征检测底部的圆形关闭按钮 (常见于插屏广告)
特征:
1. 位于屏幕底部区域 (Y > 60%)
2. 水平居中 (X 靠近 W/2)
3. 圆形或近似圆形
4. 内部有高对比度边缘 (X号)
"""
if not os.path.exists(image_path):
return None
img = read_image(image_path)
if img is None:
return None
h, w = img.shape[:2]
# 1. 提取感兴趣区域 (ROI): 屏幕底部 25% (缩小范围,避开列表内容区)
roi_top = int(h * 0.75)
roi_h = h - roi_top
roi = img[roi_top:h, :]
# 转灰度
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
# 高斯模糊降噪
gray_blurred = cv2.GaussianBlur(gray, (9, 9), 2)
# 2. 霍夫圆变换检测圆形
# dp=1.2 (累加器分辨率), minDist=w/5 (圆心最小距离), param1=100 (Canny高阈值), param2=30 (圆心累加阈值), minR=w*0.04, maxR=w*0.1
# 缩小最大半径范围,防止识别到过大的按钮(如扫码充电按钮)
circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, dp=1.2, minDist=w/5,
param1=100, param2=30, minRadius=int(w*0.04), maxRadius=int(w*0.09))
if circles is not None:
circles = np.round(circles[0, :]).astype("int")
best_circle = None
min_dist_to_center = float('inf')
for (cx, cy, r) in circles:
# 还原到全图坐标
global_cy = roi_top + cy
global_cx = cx
# 过滤1: 必须在水平中心附近 (容差 15%,收紧限制)
if abs(global_cx - w//2) > (w * 0.15):
continue
# 过滤2: 垂直方向限制 (必须在屏幕 80% - 95% 之间)
# 这样可以避开位于 60%-80% 区域的列表项卡片
rel_y = global_cy / h
if rel_y < 0.80 or rel_y > 0.95:
continue
dist = abs(global_cx - w//2)
if dist < min_dist_to_center:
min_dist_to_center = dist
best_circle = (global_cx, global_cy, r)
if best_circle:
cx, cy, r = best_circle
# 确保转换为标准的 Python int否则 uiautomator2 click 可能会报错 (JSON serializable error)
cx, cy, r = int(cx), int(cy), int(r)
# [Safety Check] 底部安全区排除
# 如果检测到的圆形按钮位于屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 区域内,认为是误判(如误触底部扫码充电等)
if cy > (h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
logger.warning(f"Ignored Bottom Circle at ({cx}, {cy}) - in Bottom Safety Zone ({int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}%).")
return None
logger.info(f"Found Bottom Circle Button via Hough: ({cx}, {cy}), r={r}")
if debug_dir:
os.makedirs(debug_dir, exist_ok=True)
debug_img = img.copy()
cv2.circle(debug_img, (cx, cy), r, (0, 255, 0), 2)
cv2.circle(debug_img, (cx, cy), 2, (0, 0, 255), 3)
save_image(os.path.join(debug_dir, "debug_bottom_circle.jpg"), debug_img)
return (cx, cy)
return None
def find_expand_button_position(image_path, debug_dir=None, debug_filename_prefix=None):
"""
通过几何特征识别"全部时段"按钮的位置
特征:该行左侧(30%)和右侧(30%)基本为空白,中间有内容
:param image_path: 截图路径
:param debug_dir: 调试图片保存目录如果为None则不保存
:param debug_filename_prefix: 调试图片文件名前缀
:return: (x, y) 坐标中心点,如果未找到返回 None
"""
if not os.path.exists(image_path):
return None
img = read_image(image_path)
if img is None:
return None
h, w = img.shape[:2]
# 转灰度
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 边缘检测
edges = cv2.Canny(gray, 50, 150)
# 定义区域
left_w = int(w * 0.35)
right_w = int(w * 0.65)
# 提取各部分边缘
left_part = edges[:, :left_w]
right_part = edges[:, right_w:]
center_part = edges[:, left_w:right_w]
# 计算每一行的边缘点数量
row_sum_left = np.sum(left_part, axis=1) / 255.0
row_sum_right = np.sum(right_part, axis=1) / 255.0
row_sum_center = np.sum(center_part, axis=1) / 255.0
# 筛选条件:左右边缘点很少,中间边缘点较多
# 阈值可以根据实际情况调整
# 允许少量噪点所以不是严格的0而是小于某个较小值例如宽度的1%
noise_threshold = 2 # 允许2个像素的噪点
content_threshold = 5 # 中间至少有5个像素的边缘
candidates = (row_sum_left <= noise_threshold) & \
(row_sum_right <= noise_threshold) & \
(row_sum_center >= content_threshold)
# 找到连续的候选行
y_indices = np.where(candidates)[0]
if len(y_indices) == 0:
return None
# 将连续行分组
segments = []
if len(y_indices) > 0:
start = y_indices[0]
prev = y_indices[0]
for y in y_indices[1:]:
if y > prev + 5: # 允许5像素断裂
segments.append((start, prev))
start = y
prev = y
segments.append((start, prev))
# 筛选最合适的段
best_segment = None
# 我们期望按钮在屏幕中下部,且高度适中(例如 20-100px
# 且通常是在价格表下方。假设价格表占据了屏幕上部。
# 我们可以简单地取符合条件的段中Y值最大的那个最靠下的或者最符合"中间有字"特征的。
# 考虑到页面底部可能有其他干扰,取"中下部"的一个。
valid_segments = []
for start, end in segments:
height = end - start
mid_y = (start + end) // 2
# 过滤掉太高或太矮的区域
if height < 20 or height > 150:
continue
# 过滤掉顶部的区域(可能是标题栏误判)
if mid_y < h * 0.3:
continue
# 过滤掉底部的区域(可能是底部按钮)
if mid_y > h * 0.9:
continue
valid_segments.append((start, end))
if not valid_segments:
return None
# 如果有多个,通常"全部时段"是在价格表之后,紧接着的一个
# 这里我们取第一个(最靠上的)符合条件的段,因为它紧跟在价格表下方
# 或者取所有段中,中间内容最"紧凑"的?
# 让我们简单点,取第一个符合条件的段。
best_segment = valid_segments[0]
start, end = best_segment
center_y = (start + end) // 2
center_x = w // 2
if debug_dir:
os.makedirs(debug_dir, exist_ok=True)
debug_img = img.copy()
# 画出识别区域
cv2.rectangle(debug_img, (0, start), (w, end), (0, 255, 0), 2)
# 画出红点
cv2.circle(debug_img, (center_x, center_y), 10, (0, 0, 255), -1)
# 保存 flag 图片
if debug_filename_prefix:
# Sanitize filename: remove invalid chars
import re
safe_prefix = re.sub(r'[\\/*?:"<>|]', '_', str(debug_filename_prefix))
debug_name = f"{safe_prefix}_flag_expand.jpg"
else:
timestamp = time.strftime("%Y%m%d_%H%M%S")
debug_name = f"{timestamp}_flag_expand.jpg"
debug_path = os.path.join(debug_dir, debug_name)
save_image(debug_path, debug_img)
logger.info(f"Saved debug image to {debug_path}")
return (int(center_x), int(center_y))
def get_row_stats(gray):
"""
计算每一行的统计特征
"""
h, w = gray.shape
# 中央区域 (用于检测内容)
center_x = w // 2
strip_w = 100
center_strip = gray[:, center_x - 50 : center_x + 50]
# 边缘区域 (用于检测背景/边距)
# 假设边距至少有 10px
edge_strip = gray[:, 0:20]
row_means = np.mean(center_strip, axis=1)
row_stds = np.std(center_strip, axis=1)
edge_means = np.mean(edge_strip, axis=1)
return row_means, row_stds, edge_means
def crop_cards_from_image(img_path, output_dir=None, save_debug=True):
"""
从图片中裁剪场站卡片
:param img_path: 图片路径
:param output_dir: 输出目录,默认与 img_path 相同
:param save_debug: 是否保存调试图 (_flag.jpg)
:return: 裁剪出的卡片列表,每项包含 (out_path, (click_x, click_y))
"""
logger.info(f"Processing: {img_path}")
if not os.path.exists(img_path):
logger.info(f"Error: File not found {img_path}")
return []
img = read_image(img_path)
if img is None:
logger.info(f"Error: Failed to load image {img_path}")
return []
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
h, w = gray.shape
row_means, row_stds, edge_means = get_row_stats(gray)
# 参数定义
# 背景灰度值范围 (根据 Analyze2.py 的输出,背景约 242)
BG_GRAY_MIN = 230
BG_GRAY_MAX = 250
# 间隙判定:中央区域也是均匀的灰度
# 真正的背景灰度约 242。卡片内的伪背景(不够白)约 246-248。
# 所以降低 GAP_MEAN_MAX 以区分 Gap 和 Dirty White。
GAP_MEAN_MIN = 235
GAP_MEAN_MAX = 244
GAP_STD_MAX = 10.0
segments = []
is_in_card = False
start_y = 0
# 记录原始的行状态,用于后续可能的合并优化
row_is_card = []
for y in range(h):
rm = row_means[y]
rs = row_stds[y]
em = edge_means[y]
# 1. 判定当前行是否可能是卡片的一部分
# 条件A: 边缘是灰色的 (说明有边距排除了全宽的Header/Footer)
has_gray_margin = (BG_GRAY_MIN <= em <= BG_GRAY_MAX)
# 条件B: 中央不是均匀的背景灰 (说明有内容,或者是白色底)
# 如果是 Gap则 Mean 在 Gap范围 且 Std 很小
is_gap = (GAP_MEAN_MIN <= rm <= GAP_MEAN_MAX) and (rs < GAP_STD_MAX)
is_card_row = has_gray_margin and not is_gap
row_is_card.append(is_card_row)
# 简单的形态学闭运算:填补卡片内部的小裂缝
# 如果一个 False (Gap) 的上下都是 True (Card),且 Gap 长度很短,则将其视为 Card
# 允许的裂缝最大长度
# [优化] 减小填补阈值,避免把两个卡片中间的真 Gap 填补了导致粘连
# 之前是 10现在改为 3。
MAX_GAP_FILL = 3
cleaned_row_is_card = row_is_card[:]
# 查找连续的 False 区域
i = 0
while i < h:
if not cleaned_row_is_card[i]:
# Found a gap start
gap_start = i
while i < h and not cleaned_row_is_card[i]:
i += 1
gap_end = i
gap_len = gap_end - gap_start
# Check context
prev_is_card = (gap_start > 0) and cleaned_row_is_card[gap_start - 1]
next_is_card = (gap_end < h) and cleaned_row_is_card[gap_end]
if prev_is_card and next_is_card and gap_len <= MAX_GAP_FILL:
# Fill the gap
for k in range(gap_start, gap_end):
cleaned_row_is_card[k] = True
else:
i += 1
# 根据 cleaned_row_is_card 生成 segments
is_in_card = False
start_y = 0
for y in range(h):
if cleaned_row_is_card[y]:
if not is_in_card:
is_in_card = True
start_y = y
else:
if is_in_card:
is_in_card = False
end_y = y
height = end_y - start_y
# 过滤太矮的区域
if height > 100: # 稍微放宽一点150 -> 100
segments.append((start_y, end_y))
# 处理最后一个 segment
if is_in_card:
end_y = h
height = end_y - start_y
if height > 100:
segments.append((start_y, end_y))
# [新增] 触底过滤:过滤掉延伸到图片底部的 Segment
# 底部通常是导航栏或被截断的卡片
BOTTOM_MARGIN = 50 # 距离底部多少像素内算触底
valid_segments_scan = []
for y1, y2 in segments:
if y2 >= h - BOTTOM_MARGIN:
logger.info(f" Dropping segment Y={y1}-{y2} because it touches the bottom (H={h}).")
continue
valid_segments_scan.append((y1, y2))
segments = valid_segments_scan
logger.info(f" Found {len(segments)} potential segments based on vertical scan.")
# 1.5 Group Segments by Large Gaps (to separate Header / List / Footer)
# The filter bar usually creates a large gap (e.g. > 60px).
SECTION_GAP_MIN = 60
groups = []
if segments:
current_group = [segments[0]]
for i in range(1, len(segments)):
prev_end = segments[i-1][1]
curr_start = segments[i][0]
gap = curr_start - prev_end
if gap > SECTION_GAP_MIN:
groups.append(current_group)
current_group = []
current_group.append(segments[i])
groups.append(current_group)
logger.info(f" Found {len(groups)} segment groups.")
# Select the Target Group
# Strategy: Select the LAST group that has at least one 'substantial' segment (H > 150).
# This assumes the Station List is the main content and usually at the bottom (scrolling area).
target_group = []
for g in reversed(groups):
# Check if group has substantial content
has_content = False
for (y1, y2) in g:
if (y2 - y1) > 150:
has_content = True
break
if has_content:
target_group = g
break
if not target_group and segments:
# Fallback to all segments if no substantial group found
target_group = segments
logger.info(f" Selected group with {len(target_group)} segments.")
# 2. 确定每个卡片的左右边界 (Width Refinement)
# 收集所有 segment 的建议边界,然后统一
candidate_x1 = []
candidate_x2 = []
temp_valid_segments = []
for y1, y2 in target_group:
# 取中间一段行来分析
mid_y = (y1 + y2) // 2
sample_h = min(10, y2 - y1)
sample_rows = gray[mid_y - sample_h//2 : mid_y + sample_h//2, :]
col_means = np.mean(sample_rows, axis=0)
# 从左向右找第一个非灰像素
x1 = 0
for x in range(w // 2):
if not (BG_GRAY_MIN <= col_means[x] <= BG_GRAY_MAX):
x1 = x
break
# 从右向左找第一个非灰像素
x2 = w
for x in range(w - 1, w // 2, -1):
if not (BG_GRAY_MIN <= col_means[x] <= BG_GRAY_MAX):
x2 = x
break
if x1 >= x2 or (x2 - x1) < w * 0.5:
# 如果这个 segment 无法确定宽度,可能不是有效卡片,暂不参与宽度投票
# 但为了不漏掉,暂时先记录,用默认值填充
logger.warning(f" Warning: Segment {y1}-{y2} has weird width {x2-x1}.")
pass
else:
candidate_x1.append(x1)
candidate_x2.append(x2)
temp_valid_segments.append((y1, y2))
# 计算统一宽度
if not candidate_x1:
logger.info(" No valid width detected. Using default.")
final_x1 = 0
final_x2 = w
else:
# 使用中位数或众数来消除噪声
# 考虑到对齐Min x1 和 Max x2 可能更合适?或者 Median。
# 通常卡片是对齐的,所以 x1 应该几乎一样。
final_x1 = int(np.median(candidate_x1))
final_x2 = int(np.median(candidate_x2))
# 稍微加点 Padding (但不要超过图片边界)
final_x1 = max(0, final_x1 - 5)
final_x2 = min(w, final_x2 + 5)
logger.info(f" Unified Width: X={final_x1}-{final_x2}, W={final_x2 - final_x1}")
# 3. 过滤高度异常的卡片 (Height Filtering)
# 计算所有潜在卡片的高度
final_cards = []
if not temp_valid_segments:
logger.info(" No segments found.")
else:
heights = [y2 - y1 for y1, y2 in temp_valid_segments]
if not heights:
logger.info(" No heights to calculate.")
else:
max_h = max(heights)
median_h = np.median(heights)
# 策略:如果高度差异较大,丢弃过小的卡片
# 阈值设定为 Median 的 75% (允许一定程度的偏差,如 220 vs 258 是允许的,但 150 vs 250 应被过滤)
# 使用 Median 比 Max 更稳健,避免被单个超高卡片(如广告)带偏
threshold_h = median_h * 0.70
for (y1, y2), h in zip(temp_valid_segments, heights):
if h < threshold_h:
logger.info(f" Filtering out segment Y={y1}-{y2} (H={h}) because it's too short (Threshold={threshold_h:.1f}).")
else:
final_cards.append((y1, y2, final_x1, final_x2))
logger.info(f" Card: Y={y1}-{y2}, X={final_x1}-{final_x2}, H={h}")
# 4. 保存结果
if output_dir is None:
output_dir = os.path.dirname(img_path)
base_name = os.path.basename(img_path)
stem, ext = os.path.splitext(base_name)
# 准备可视化标记图
debug_img = img.copy()
results = []
# 准备 JSON 数据结构
json_data = {
"image": base_name,
"width": w,
"height": h,
"cards": []
}
# 准备 _vl.jpg (只画框,不画红点)
vl_img = img.copy()
logger.info(f" Step [2.1/VL] 准备在 VL 图片上绘制 {len(final_cards)} 个场站的绿色方框...")
for idx, (y1, y2, x1, x2) in enumerate(final_cards):
# 计算点击点 (左上角,避免被底部按钮遮挡)
# 策略X偏移 15%, Y偏移 20%
w_card = x2 - x1
h_card = y2 - y1
click_x = int(x1 + w_card * 0.15)
click_y = int(y1 + h_card * 0.20)
# [修改] 不再保存单张子图,只记录元数据
# card = img[y1:y2, x1:x2]
# 文件名添加坐标: _ClickX_ClickY
# out_name = f"{stem}_{idx + 1}_{click_x}_{click_y}{ext}"
# out_path = os.path.join(output_dir, out_name)
# cv2.imwrite(out_path, card)
# logger.info(f" Saved {out_path}")
# results.append((out_path, (click_x, click_y)))
# 在标记图上画红点 (实心圆, 半径10, 红色BGR)
cv2.circle(debug_img, (click_x, click_y), 10, (0, 0, 255), -1)
# [修改] 必须画绿框,因为后续视觉模型依赖这个框来识别范围
cv2.rectangle(debug_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
# 在 _vl 图上只画绿框
cv2.rectangle(vl_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
# 收集 JSON 数据
card_info = {
"id": idx + 1,
"rect": [x1, y1, x2, y2],
"click_point": [click_x, click_y]
}
json_data["cards"].append(card_info)
# 记录区域信息供调用者使用 (如果需要)
# 格式: (None, (click_x, click_y), (x1, y1, x2, y2))
results.append((None, (click_x, click_y), (x1, y1, x2, y2)))
# [删除] 之前生成的单张 _for_vl.jpg 逻辑已移除
# 保存标记图 (_flag.jpg)
if save_debug:
flag_out_path = os.path.join(output_dir, f"{stem}_flag{ext}")
save_image(flag_out_path, debug_img)
logger.info(f" Saved Debug Image: {flag_out_path}")
# 保存 _vl.jpg
vl_out_path = os.path.join(output_dir, f"{stem}_vl{ext}")
save_image(vl_out_path, vl_img)
logger.info(f" Step [2.2/VL] 已保存带有绿色方框的图片: {vl_out_path}")
# 保存 .json
import json
json_out_path = os.path.join(output_dir, f"{stem}.json")
with open(json_out_path, 'w', encoding='utf-8') as f:
json.dump(json_data, f, ensure_ascii=False, indent=4)
logger.info(f" Step [2.3/JSON] 已保存场站坐标元数据: {json_out_path}")
return results