923 lines
35 KiB
Python
923 lines
35 KiB
Python
import logging
|
||
import os
|
||
import cv2
|
||
import numpy as np
|
||
import time
|
||
from Config.Config import BOTTOM_SAFE_EXCLUDE_RATIO, TEMP_IMAGE_DIR
|
||
|
||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||
logger = logging.getLogger(__name__)
|
||
|
||
def read_image(path):
|
||
"""读取图片,支持中文路径"""
|
||
try:
|
||
return cv2.imdecode(np.fromfile(path, dtype=np.uint8), -1)
|
||
except Exception as e:
|
||
logger.info(f"Error reading image {path}: {e}")
|
||
return None
|
||
|
||
def save_image(path, img):
|
||
"""保存图片,支持中文路径"""
|
||
try:
|
||
ext = os.path.splitext(path)[1]
|
||
if not ext:
|
||
ext = ".jpg"
|
||
cv2.imencode(ext, img)[1].tofile(path)
|
||
return True
|
||
except Exception as e:
|
||
logger.error(f"Error saving image {path}: {e}")
|
||
return False
|
||
|
||
# 截图
|
||
def take_screenshot(d, image_uuid, save_dir=TEMP_IMAGE_DIR):
|
||
path = f"{save_dir}/{image_uuid}.jpg"
|
||
os.makedirs(save_dir, exist_ok=True)
|
||
d.screenshot(path)
|
||
return path
|
||
|
||
def click_image_template(d, template_path, timeout=5.0, threshold=0.8):
|
||
"""
|
||
使用 OpenCV 模板匹配查找并点击图片
|
||
:param d: uiautomator2 设备对象
|
||
:param template_path: 模板图片路径
|
||
:param timeout: 超时时间(秒)
|
||
:param threshold: 匹配阈值 (0.0 - 1.0)
|
||
:return: 是否点击成功
|
||
"""
|
||
if not os.path.exists(template_path):
|
||
logger.info(f"Template file not found: {template_path}")
|
||
return False
|
||
|
||
template = read_image(template_path)
|
||
if template is None:
|
||
logger.info(f"Failed to load template: {template_path}")
|
||
return False
|
||
|
||
t_h, t_w = template.shape[:2]
|
||
|
||
start_time = time.time()
|
||
best_val_overall = 0.0
|
||
|
||
while time.time() - start_time < timeout:
|
||
# 临时截图
|
||
temp_uuid = "temp_click_check"
|
||
screenshot_path = take_screenshot(d, temp_uuid, save_dir=TEMP_IMAGE_DIR)
|
||
|
||
target = read_image(screenshot_path)
|
||
if target is None:
|
||
time.sleep(0.5)
|
||
continue
|
||
|
||
# 多尺度匹配
|
||
found = None
|
||
# 缩放比例从 0.5 到 1.5,步长 0.1
|
||
for scale in np.linspace(0.5, 1.5, 11):
|
||
# 调整模板大小
|
||
resized_template = cv2.resize(template, (int(t_w * scale), int(t_h * scale)))
|
||
r_h, r_w = resized_template.shape[:2]
|
||
|
||
# 如果模板比目标还大,跳过
|
||
if r_h > target.shape[0] or r_w > target.shape[1]:
|
||
continue
|
||
|
||
result = cv2.matchTemplate(target, resized_template, cv2.TM_CCOEFF_NORMED)
|
||
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
||
|
||
if max_val > best_val_overall:
|
||
best_val_overall = max_val
|
||
|
||
if max_val >= threshold:
|
||
found = (max_val, max_loc, r_w, r_h)
|
||
break # 找到满足阈值的即停止当前截图的搜索
|
||
|
||
# 清理临时文件
|
||
try:
|
||
os.remove(screenshot_path)
|
||
except:
|
||
pass
|
||
|
||
if found:
|
||
max_val, max_loc, r_w, r_h = found
|
||
# 计算中心点
|
||
top_left = max_loc
|
||
center_x = top_left[0] + r_w // 2
|
||
center_y = top_left[1] + r_h // 2
|
||
|
||
logger.info(f"Found image at ({center_x}, {center_y}) with confidence {max_val:.2f}")
|
||
d.click(center_x, center_y)
|
||
return True
|
||
|
||
time.sleep(1.0)
|
||
|
||
logger.info(f"Image not found after {timeout}s (Best confidence: {best_val_overall:.2f})")
|
||
return False
|
||
|
||
def detect_black_agree_button(image_path, debug_dir=None):
|
||
"""
|
||
通过计算机图形学检测黑色的"同意"按钮 (Image 1 场景)
|
||
特征:黑色圆角矩形,位于屏幕中下部,面积适中
|
||
:param image_path: 截图路径
|
||
:return: (x, y) 坐标中心点,如果未找到返回 None
|
||
"""
|
||
if not os.path.exists(image_path):
|
||
return None
|
||
|
||
img = read_image(image_path)
|
||
if img is None:
|
||
return None
|
||
|
||
h, w = img.shape[:2]
|
||
|
||
# 转换为HSV颜色空间,因为黑色更容易过滤
|
||
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
||
|
||
# 定义黑色的范围
|
||
# 黑色:V (Brightness) 很低
|
||
lower_black = np.array([0, 0, 0])
|
||
upper_black = np.array([180, 255, 40]) # V < 40 认为是黑色
|
||
|
||
mask = cv2.inRange(hsv, lower_black, upper_black)
|
||
|
||
# 限制搜索区域:通常在屏幕下半部分
|
||
roi_top = int(h * 0.4)
|
||
roi_bottom = int(h * 0.8)
|
||
roi_mask = np.zeros_like(mask)
|
||
roi_mask[roi_top:roi_bottom, :] = mask[roi_top:roi_bottom, :]
|
||
|
||
# 形态学操作:去除噪点,连接断开的区域
|
||
kernel = np.ones((5, 5), np.uint8)
|
||
roi_mask = cv2.morphologyEx(roi_mask, cv2.MORPH_CLOSE, kernel)
|
||
roi_mask = cv2.morphologyEx(roi_mask, cv2.MORPH_OPEN, kernel)
|
||
|
||
# 查找轮廓
|
||
contours, _ = cv2.findContours(roi_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||
|
||
best_cnt = None
|
||
max_area = 0
|
||
|
||
for cnt in contours:
|
||
area = cv2.contourArea(cnt)
|
||
x, y, cw, ch = cv2.boundingRect(cnt)
|
||
aspect_ratio = float(cw) / ch
|
||
|
||
# 过滤条件
|
||
# 1. 面积要够大 (例如 > 屏幕面积的 1%)
|
||
if area < (w * h * 0.01):
|
||
continue
|
||
|
||
# 2. 宽高比:通常按钮是扁长条,例如 > 2.0
|
||
if aspect_ratio < 2.0 or aspect_ratio > 10.0:
|
||
continue
|
||
|
||
# 3. 宽度:通常占据屏幕宽度的 50% 以上
|
||
if cw < (w * 0.5):
|
||
continue
|
||
|
||
if area > max_area:
|
||
max_area = area
|
||
best_cnt = cnt
|
||
|
||
if best_cnt is not None:
|
||
x, y, cw, ch = cv2.boundingRect(best_cnt)
|
||
center_x = x + cw // 2
|
||
center_y = y + ch // 2
|
||
logger.info(f"Found Black Agree Button at ({center_x}, {center_y}), Size: {cw}x{ch}")
|
||
|
||
if debug_dir:
|
||
os.makedirs(debug_dir, exist_ok=True)
|
||
debug_img = img.copy()
|
||
cv2.rectangle(debug_img, (x, y), (x+cw, y+ch), (0, 0, 255), 2)
|
||
cv2.circle(debug_img, (center_x, center_y), 5, (0, 255, 0), -1)
|
||
save_image(os.path.join(debug_dir, "debug_agree_btn.jpg"), debug_img)
|
||
|
||
return (center_x, center_y)
|
||
|
||
return None
|
||
|
||
def detect_ad_close_x(image_path, template_path, debug_dir=None, threshold=0.7):
|
||
"""
|
||
通过模板匹配检测"关闭(X)"按钮 (Image 2 场景)
|
||
支持多尺度匹配
|
||
:param image_path: 截图路径
|
||
:param template_path: 模板图片路径
|
||
:param debug_dir: 调试目录
|
||
:param threshold: 匹配阈值
|
||
:return: (x, y) 坐标中心点,如果未找到返回 None
|
||
"""
|
||
if not os.path.exists(image_path):
|
||
return None
|
||
|
||
if not os.path.exists(template_path):
|
||
logger.warning(f"Template not found: {template_path}")
|
||
return None
|
||
|
||
target = read_image(image_path)
|
||
template = read_image(template_path)
|
||
|
||
if target is None or template is None:
|
||
return None
|
||
|
||
# 转换为灰度图进行匹配,减少颜色干扰
|
||
target_gray = cv2.cvtColor(target, cv2.COLOR_BGR2GRAY)
|
||
template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
|
||
|
||
t_h, t_w = template_gray.shape[:2]
|
||
|
||
best_match = None
|
||
|
||
# 多尺度匹配: 缩放模板
|
||
# 假设模板可能比实际大,也可能小。范围 0.5 - 1.5
|
||
scales = np.linspace(0.5, 1.5, 20)
|
||
|
||
for scale in scales:
|
||
# 计算缩放后的模板尺寸
|
||
new_w = int(t_w * scale)
|
||
new_h = int(t_h * scale)
|
||
|
||
# 确保缩放后的模板不大于目标图像
|
||
if new_w > target_gray.shape[1] or new_h > target_gray.shape[0]:
|
||
continue
|
||
|
||
resized_template = cv2.resize(template_gray, (new_w, new_h))
|
||
|
||
# 匹配
|
||
result = cv2.matchTemplate(target_gray, resized_template, cv2.TM_CCOEFF_NORMED)
|
||
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
||
|
||
if best_match is None or max_val > best_match[0]:
|
||
best_match = (max_val, max_loc, scale, new_w, new_h)
|
||
|
||
if best_match is None:
|
||
return None
|
||
|
||
max_val, max_loc, best_scale, best_w, best_h = best_match
|
||
|
||
# 获取目标图像尺寸
|
||
target_h, target_w = target_gray.shape[:2]
|
||
|
||
top_left = max_loc
|
||
center_x = top_left[0] + best_w // 2
|
||
center_y = top_left[1] + best_h // 2
|
||
|
||
# --- 位置启发式过滤 ---
|
||
# 计算相对位置
|
||
rel_x = center_x / target_w
|
||
rel_y = center_y / target_h
|
||
|
||
logger.info(f"Ad Close Button Match: Confidence={max_val:.4f}, Scale={best_scale:.2f}, Pos=({center_x}, {center_y}), Rel=({rel_x:.2f}, {rel_y:.2f})")
|
||
|
||
is_valid_pos = True
|
||
|
||
# 规则1: 过滤掉屏幕正中央偏上的区域 (通常是广告标题、图标或内容)
|
||
# 范围: X在 [0.3, 0.7] 且 Y在 [0.15, 0.5]
|
||
if 0.3 < rel_x < 0.7 and 0.15 < rel_y < 0.5:
|
||
logger.warning(f"Ignored match at ({center_x}, {center_y}) - likely Ad Content/Title (Center-Top area).")
|
||
is_valid_pos = False
|
||
|
||
# 规则2: 如果置信度不是特别高 (>0.9), 强制要求在典型区域 (右上角 或 底部中间)
|
||
# 右上角: X > 0.7, Y < 0.5
|
||
# 底部中间: Y > 0.6
|
||
if is_valid_pos and max_val < 0.98: # 如果置信度非常高(0.98+),可能是极其标准的X,暂时放过(除非在禁区)
|
||
if not ((rel_x > 0.7 and rel_y < 0.5) or (rel_y > 0.6)):
|
||
logger.warning(f"Ignored match at ({center_x}, {center_y}) - not in typical Close Button regions (Top-Right or Bottom).")
|
||
is_valid_pos = False
|
||
|
||
if max_val >= threshold:
|
||
if is_valid_pos:
|
||
# [Safety Check] 底部安全区排除
|
||
# 如果检测到的关闭按钮位于屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 区域内,认为是误判(如误触底部功能按钮)
|
||
if center_y > (target_h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
|
||
logger.warning(f"Ignored Ad Close Button at ({center_x}, {center_y}) - in Bottom Safety Zone ({int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}%).")
|
||
# 也可以保存一下调试图
|
||
if debug_dir:
|
||
os.makedirs(debug_dir, exist_ok=True)
|
||
debug_img = target.copy()
|
||
cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 0, 128), 2) # Dark Red for Safety Ignored
|
||
cv2.putText(debug_img, f"SAFETY IGNORED",
|
||
(top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 128), 1)
|
||
save_image(os.path.join(debug_dir, "debug_ad_close_safety_ignored.jpg"), debug_img)
|
||
return None
|
||
|
||
logger.info(f"Found Ad Close Button at ({center_x}, {center_y})")
|
||
|
||
if debug_dir:
|
||
os.makedirs(debug_dir, exist_ok=True)
|
||
debug_img = target.copy()
|
||
cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 0, 255), 2)
|
||
cv2.circle(debug_img, (center_x, center_y), 5, (0, 255, 0), -1)
|
||
cv2.putText(debug_img, f"Conf: {max_val:.2f}, Sc: {best_scale:.2f}",
|
||
(top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
|
||
save_image(os.path.join(debug_dir, "debug_ad_close_x.jpg"), debug_img)
|
||
|
||
return (center_x, center_y)
|
||
else:
|
||
# 虽然置信度高,但是位置不对,保存为 False Positive 供调试
|
||
if debug_dir:
|
||
os.makedirs(debug_dir, exist_ok=True)
|
||
debug_img = target.copy()
|
||
cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 165, 255), 2) # Orange for ignored
|
||
cv2.putText(debug_img, f"IGNORED Pos",
|
||
(top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 165, 255), 1)
|
||
save_image(os.path.join(debug_dir, "debug_ad_close_ignored.jpg"), debug_img)
|
||
|
||
# 如果没找到,但有一定置信度,也保存一下调试图以便分析
|
||
if max_val > 0.4 and debug_dir:
|
||
os.makedirs(debug_dir, exist_ok=True)
|
||
debug_img = target.copy()
|
||
top_left = max_loc
|
||
cv2.rectangle(debug_img, top_left, (top_left[0] + best_w, top_left[1] + best_h), (0, 255, 255), 2)
|
||
cv2.putText(debug_img, f"Failed Conf: {max_val:.2f}",
|
||
(top_left[0], top_left[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
|
||
save_image(os.path.join(debug_dir, "debug_ad_close_fail.jpg"), debug_img)
|
||
|
||
return None
|
||
|
||
def detect_any_ad_close(image_path, template_dir, debug_dir=None):
|
||
"""
|
||
遍历模板目录下的所有 ad_close*.jpg 进行匹配
|
||
"""
|
||
if not os.path.exists(template_dir):
|
||
return None
|
||
|
||
for filename in os.listdir(template_dir):
|
||
if filename.startswith("ad_close") and filename.endswith(".jpg"):
|
||
template_path = os.path.join(template_dir, filename)
|
||
logger.info(f"Trying template: {filename}")
|
||
pos = detect_ad_close_x(image_path, template_path, debug_dir=debug_dir)
|
||
if pos:
|
||
return pos
|
||
return None
|
||
|
||
def detect_bottom_close_circle(image_path, debug_dir=None):
|
||
"""
|
||
通过几何特征检测底部的圆形关闭按钮 (常见于插屏广告)
|
||
特征:
|
||
1. 位于屏幕底部区域 (Y > 60%)
|
||
2. 水平居中 (X 靠近 W/2)
|
||
3. 圆形或近似圆形
|
||
4. 内部有高对比度边缘 (X号)
|
||
"""
|
||
if not os.path.exists(image_path):
|
||
return None
|
||
|
||
img = read_image(image_path)
|
||
if img is None:
|
||
return None
|
||
|
||
h, w = img.shape[:2]
|
||
|
||
# 1. 提取感兴趣区域 (ROI): 屏幕底部 25% (缩小范围,避开列表内容区)
|
||
roi_top = int(h * 0.75)
|
||
roi_h = h - roi_top
|
||
roi = img[roi_top:h, :]
|
||
|
||
# 转灰度
|
||
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
|
||
# 高斯模糊降噪
|
||
gray_blurred = cv2.GaussianBlur(gray, (9, 9), 2)
|
||
|
||
# 2. 霍夫圆变换检测圆形
|
||
# dp=1.2 (累加器分辨率), minDist=w/5 (圆心最小距离), param1=100 (Canny高阈值), param2=30 (圆心累加阈值), minR=w*0.04, maxR=w*0.1
|
||
# 缩小最大半径范围,防止识别到过大的按钮(如扫码充电按钮)
|
||
circles = cv2.HoughCircles(gray_blurred, cv2.HOUGH_GRADIENT, dp=1.2, minDist=w/5,
|
||
param1=100, param2=30, minRadius=int(w*0.04), maxRadius=int(w*0.09))
|
||
|
||
if circles is not None:
|
||
circles = np.round(circles[0, :]).astype("int")
|
||
|
||
best_circle = None
|
||
min_dist_to_center = float('inf')
|
||
|
||
for (cx, cy, r) in circles:
|
||
# 还原到全图坐标
|
||
global_cy = roi_top + cy
|
||
global_cx = cx
|
||
|
||
# 过滤1: 必须在水平中心附近 (容差 15%,收紧限制)
|
||
if abs(global_cx - w//2) > (w * 0.15):
|
||
continue
|
||
|
||
# 过滤2: 垂直方向限制 (必须在屏幕 80% - 95% 之间)
|
||
# 这样可以避开位于 60%-80% 区域的列表项卡片
|
||
rel_y = global_cy / h
|
||
if rel_y < 0.80 or rel_y > 0.95:
|
||
continue
|
||
|
||
dist = abs(global_cx - w//2)
|
||
if dist < min_dist_to_center:
|
||
min_dist_to_center = dist
|
||
best_circle = (global_cx, global_cy, r)
|
||
|
||
if best_circle:
|
||
cx, cy, r = best_circle
|
||
# 确保转换为标准的 Python int,否则 uiautomator2 click 可能会报错 (JSON serializable error)
|
||
cx, cy, r = int(cx), int(cy), int(r)
|
||
|
||
# [Safety Check] 底部安全区排除
|
||
# 如果检测到的圆形按钮位于屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 区域内,认为是误判(如误触底部扫码充电等)
|
||
if cy > (h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
|
||
logger.warning(f"Ignored Bottom Circle at ({cx}, {cy}) - in Bottom Safety Zone ({int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}%).")
|
||
return None
|
||
|
||
logger.info(f"Found Bottom Circle Button via Hough: ({cx}, {cy}), r={r}")
|
||
|
||
if debug_dir:
|
||
os.makedirs(debug_dir, exist_ok=True)
|
||
debug_img = img.copy()
|
||
cv2.circle(debug_img, (cx, cy), r, (0, 255, 0), 2)
|
||
cv2.circle(debug_img, (cx, cy), 2, (0, 0, 255), 3)
|
||
save_image(os.path.join(debug_dir, "debug_bottom_circle.jpg"), debug_img)
|
||
|
||
return (cx, cy)
|
||
|
||
return None
|
||
|
||
def find_expand_button_position(image_path, debug_dir=None, debug_filename_prefix=None):
|
||
"""
|
||
通过几何特征识别"全部时段"按钮的位置
|
||
特征:该行左侧(30%)和右侧(30%)基本为空白,中间有内容
|
||
:param image_path: 截图路径
|
||
:param debug_dir: 调试图片保存目录,如果为None则不保存
|
||
:param debug_filename_prefix: 调试图片文件名前缀
|
||
:return: (x, y) 坐标中心点,如果未找到返回 None
|
||
"""
|
||
if not os.path.exists(image_path):
|
||
return None
|
||
|
||
img = read_image(image_path)
|
||
if img is None:
|
||
return None
|
||
|
||
h, w = img.shape[:2]
|
||
|
||
# 转灰度
|
||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||
|
||
# 边缘检测
|
||
edges = cv2.Canny(gray, 50, 150)
|
||
|
||
# 定义区域
|
||
left_w = int(w * 0.35)
|
||
right_w = int(w * 0.65)
|
||
|
||
# 提取各部分边缘
|
||
left_part = edges[:, :left_w]
|
||
right_part = edges[:, right_w:]
|
||
center_part = edges[:, left_w:right_w]
|
||
|
||
# 计算每一行的边缘点数量
|
||
row_sum_left = np.sum(left_part, axis=1) / 255.0
|
||
row_sum_right = np.sum(right_part, axis=1) / 255.0
|
||
row_sum_center = np.sum(center_part, axis=1) / 255.0
|
||
|
||
# 筛选条件:左右边缘点很少,中间边缘点较多
|
||
# 阈值可以根据实际情况调整
|
||
# 允许少量噪点,所以不是严格的0,而是小于某个较小值(例如宽度的1%)
|
||
noise_threshold = 2 # 允许2个像素的噪点
|
||
content_threshold = 5 # 中间至少有5个像素的边缘
|
||
|
||
candidates = (row_sum_left <= noise_threshold) & \
|
||
(row_sum_right <= noise_threshold) & \
|
||
(row_sum_center >= content_threshold)
|
||
|
||
# 找到连续的候选行
|
||
y_indices = np.where(candidates)[0]
|
||
|
||
if len(y_indices) == 0:
|
||
return None
|
||
|
||
# 将连续行分组
|
||
segments = []
|
||
if len(y_indices) > 0:
|
||
start = y_indices[0]
|
||
prev = y_indices[0]
|
||
for y in y_indices[1:]:
|
||
if y > prev + 5: # 允许5像素断裂
|
||
segments.append((start, prev))
|
||
start = y
|
||
prev = y
|
||
segments.append((start, prev))
|
||
|
||
# 筛选最合适的段
|
||
best_segment = None
|
||
|
||
# 我们期望按钮在屏幕中下部,且高度适中(例如 20-100px)
|
||
# 且通常是在价格表下方。假设价格表占据了屏幕上部。
|
||
# 我们可以简单地取符合条件的段中,Y值最大的那个(最靠下的),或者最符合"中间有字"特征的。
|
||
# 考虑到页面底部可能有其他干扰,取"中下部"的一个。
|
||
|
||
valid_segments = []
|
||
for start, end in segments:
|
||
height = end - start
|
||
mid_y = (start + end) // 2
|
||
|
||
# 过滤掉太高或太矮的区域
|
||
if height < 20 or height > 150:
|
||
continue
|
||
|
||
# 过滤掉顶部的区域(可能是标题栏误判)
|
||
if mid_y < h * 0.3:
|
||
continue
|
||
|
||
# 过滤掉底部的区域(可能是底部按钮)
|
||
if mid_y > h * 0.9:
|
||
continue
|
||
|
||
valid_segments.append((start, end))
|
||
|
||
if not valid_segments:
|
||
return None
|
||
|
||
# 如果有多个,通常"全部时段"是在价格表之后,紧接着的一个
|
||
# 这里我们取第一个(最靠上的)符合条件的段,因为它紧跟在价格表下方
|
||
# 或者取所有段中,中间内容最"紧凑"的?
|
||
# 让我们简单点,取第一个符合条件的段。
|
||
best_segment = valid_segments[0]
|
||
|
||
start, end = best_segment
|
||
center_y = (start + end) // 2
|
||
center_x = w // 2
|
||
|
||
if debug_dir:
|
||
os.makedirs(debug_dir, exist_ok=True)
|
||
debug_img = img.copy()
|
||
# 画出识别区域
|
||
cv2.rectangle(debug_img, (0, start), (w, end), (0, 255, 0), 2)
|
||
# 画出红点
|
||
cv2.circle(debug_img, (center_x, center_y), 10, (0, 0, 255), -1)
|
||
|
||
# 保存 flag 图片
|
||
if debug_filename_prefix:
|
||
# Sanitize filename: remove invalid chars
|
||
import re
|
||
safe_prefix = re.sub(r'[\\/*?:"<>|]', '_', str(debug_filename_prefix))
|
||
debug_name = f"{safe_prefix}_flag_expand.jpg"
|
||
else:
|
||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||
debug_name = f"{timestamp}_flag_expand.jpg"
|
||
|
||
debug_path = os.path.join(debug_dir, debug_name)
|
||
save_image(debug_path, debug_img)
|
||
logger.info(f"Saved debug image to {debug_path}")
|
||
|
||
return (int(center_x), int(center_y))
|
||
|
||
|
||
def get_row_stats(gray):
|
||
"""
|
||
计算每一行的统计特征
|
||
"""
|
||
h, w = gray.shape
|
||
|
||
# 中央区域 (用于检测内容)
|
||
center_x = w // 2
|
||
strip_w = 100
|
||
center_strip = gray[:, center_x - 50 : center_x + 50]
|
||
|
||
# 边缘区域 (用于检测背景/边距)
|
||
# 假设边距至少有 10px
|
||
edge_strip = gray[:, 0:20]
|
||
|
||
row_means = np.mean(center_strip, axis=1)
|
||
row_stds = np.std(center_strip, axis=1)
|
||
edge_means = np.mean(edge_strip, axis=1)
|
||
|
||
return row_means, row_stds, edge_means
|
||
|
||
def crop_cards_from_image(img_path, output_dir=None, save_debug=True):
|
||
"""
|
||
从图片中裁剪场站卡片
|
||
:param img_path: 图片路径
|
||
:param output_dir: 输出目录,默认与 img_path 相同
|
||
:param save_debug: 是否保存调试图 (_flag.jpg)
|
||
:return: 裁剪出的卡片列表,每项包含 (out_path, (click_x, click_y))
|
||
"""
|
||
logger.info(f"Processing: {img_path}")
|
||
if not os.path.exists(img_path):
|
||
logger.info(f"Error: File not found {img_path}")
|
||
return []
|
||
|
||
img = read_image(img_path)
|
||
if img is None:
|
||
logger.info(f"Error: Failed to load image {img_path}")
|
||
return []
|
||
|
||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||
h, w = gray.shape
|
||
|
||
row_means, row_stds, edge_means = get_row_stats(gray)
|
||
|
||
# 参数定义
|
||
# 背景灰度值范围 (根据 Analyze2.py 的输出,背景约 242)
|
||
BG_GRAY_MIN = 230
|
||
BG_GRAY_MAX = 250
|
||
|
||
# 间隙判定:中央区域也是均匀的灰度
|
||
# 真正的背景灰度约 242。卡片内的伪背景(不够白)约 246-248。
|
||
# 所以降低 GAP_MEAN_MAX 以区分 Gap 和 Dirty White。
|
||
GAP_MEAN_MIN = 235
|
||
GAP_MEAN_MAX = 244
|
||
GAP_STD_MAX = 10.0
|
||
|
||
segments = []
|
||
is_in_card = False
|
||
start_y = 0
|
||
|
||
# 记录原始的行状态,用于后续可能的合并优化
|
||
row_is_card = []
|
||
|
||
for y in range(h):
|
||
rm = row_means[y]
|
||
rs = row_stds[y]
|
||
em = edge_means[y]
|
||
|
||
# 1. 判定当前行是否可能是卡片的一部分
|
||
# 条件A: 边缘是灰色的 (说明有边距,排除了全宽的Header/Footer)
|
||
has_gray_margin = (BG_GRAY_MIN <= em <= BG_GRAY_MAX)
|
||
|
||
# 条件B: 中央不是均匀的背景灰 (说明有内容,或者是白色底)
|
||
# 如果是 Gap,则 Mean 在 Gap范围 且 Std 很小
|
||
is_gap = (GAP_MEAN_MIN <= rm <= GAP_MEAN_MAX) and (rs < GAP_STD_MAX)
|
||
|
||
is_card_row = has_gray_margin and not is_gap
|
||
row_is_card.append(is_card_row)
|
||
|
||
# 简单的形态学闭运算:填补卡片内部的小裂缝
|
||
# 如果一个 False (Gap) 的上下都是 True (Card),且 Gap 长度很短,则将其视为 Card
|
||
# 允许的裂缝最大长度
|
||
# [优化] 减小填补阈值,避免把两个卡片中间的真 Gap 填补了导致粘连
|
||
# 之前是 10,现在改为 3。
|
||
MAX_GAP_FILL = 3
|
||
|
||
cleaned_row_is_card = row_is_card[:]
|
||
|
||
# 查找连续的 False 区域
|
||
i = 0
|
||
while i < h:
|
||
if not cleaned_row_is_card[i]:
|
||
# Found a gap start
|
||
gap_start = i
|
||
while i < h and not cleaned_row_is_card[i]:
|
||
i += 1
|
||
gap_end = i
|
||
gap_len = gap_end - gap_start
|
||
|
||
# Check context
|
||
prev_is_card = (gap_start > 0) and cleaned_row_is_card[gap_start - 1]
|
||
next_is_card = (gap_end < h) and cleaned_row_is_card[gap_end]
|
||
|
||
if prev_is_card and next_is_card and gap_len <= MAX_GAP_FILL:
|
||
# Fill the gap
|
||
for k in range(gap_start, gap_end):
|
||
cleaned_row_is_card[k] = True
|
||
else:
|
||
i += 1
|
||
|
||
# 根据 cleaned_row_is_card 生成 segments
|
||
is_in_card = False
|
||
start_y = 0
|
||
for y in range(h):
|
||
if cleaned_row_is_card[y]:
|
||
if not is_in_card:
|
||
is_in_card = True
|
||
start_y = y
|
||
else:
|
||
if is_in_card:
|
||
is_in_card = False
|
||
end_y = y
|
||
height = end_y - start_y
|
||
|
||
# 过滤太矮的区域
|
||
if height > 100: # 稍微放宽一点,150 -> 100
|
||
segments.append((start_y, end_y))
|
||
|
||
# 处理最后一个 segment
|
||
if is_in_card:
|
||
end_y = h
|
||
height = end_y - start_y
|
||
if height > 100:
|
||
segments.append((start_y, end_y))
|
||
|
||
# [新增] 触底过滤:过滤掉延伸到图片底部的 Segment
|
||
# 底部通常是导航栏或被截断的卡片
|
||
BOTTOM_MARGIN = 50 # 距离底部多少像素内算触底
|
||
valid_segments_scan = []
|
||
for y1, y2 in segments:
|
||
if y2 >= h - BOTTOM_MARGIN:
|
||
logger.info(f" Dropping segment Y={y1}-{y2} because it touches the bottom (H={h}).")
|
||
continue
|
||
valid_segments_scan.append((y1, y2))
|
||
segments = valid_segments_scan
|
||
|
||
logger.info(f" Found {len(segments)} potential segments based on vertical scan.")
|
||
|
||
# 1.5 Group Segments by Large Gaps (to separate Header / List / Footer)
|
||
# The filter bar usually creates a large gap (e.g. > 60px).
|
||
SECTION_GAP_MIN = 60
|
||
groups = []
|
||
if segments:
|
||
current_group = [segments[0]]
|
||
for i in range(1, len(segments)):
|
||
prev_end = segments[i-1][1]
|
||
curr_start = segments[i][0]
|
||
gap = curr_start - prev_end
|
||
|
||
if gap > SECTION_GAP_MIN:
|
||
groups.append(current_group)
|
||
current_group = []
|
||
|
||
current_group.append(segments[i])
|
||
groups.append(current_group)
|
||
|
||
logger.info(f" Found {len(groups)} segment groups.")
|
||
|
||
# Select the Target Group
|
||
# Strategy: Select the LAST group that has at least one 'substantial' segment (H > 150).
|
||
# This assumes the Station List is the main content and usually at the bottom (scrolling area).
|
||
target_group = []
|
||
for g in reversed(groups):
|
||
# Check if group has substantial content
|
||
has_content = False
|
||
for (y1, y2) in g:
|
||
if (y2 - y1) > 150:
|
||
has_content = True
|
||
break
|
||
|
||
if has_content:
|
||
target_group = g
|
||
break
|
||
|
||
if not target_group and segments:
|
||
# Fallback to all segments if no substantial group found
|
||
target_group = segments
|
||
|
||
logger.info(f" Selected group with {len(target_group)} segments.")
|
||
|
||
# 2. 确定每个卡片的左右边界 (Width Refinement)
|
||
# 收集所有 segment 的建议边界,然后统一
|
||
|
||
candidate_x1 = []
|
||
candidate_x2 = []
|
||
|
||
temp_valid_segments = []
|
||
|
||
for y1, y2 in target_group:
|
||
# 取中间一段行来分析
|
||
mid_y = (y1 + y2) // 2
|
||
sample_h = min(10, y2 - y1)
|
||
sample_rows = gray[mid_y - sample_h//2 : mid_y + sample_h//2, :]
|
||
col_means = np.mean(sample_rows, axis=0)
|
||
|
||
# 从左向右找第一个非灰像素
|
||
x1 = 0
|
||
for x in range(w // 2):
|
||
if not (BG_GRAY_MIN <= col_means[x] <= BG_GRAY_MAX):
|
||
x1 = x
|
||
break
|
||
|
||
# 从右向左找第一个非灰像素
|
||
x2 = w
|
||
for x in range(w - 1, w // 2, -1):
|
||
if not (BG_GRAY_MIN <= col_means[x] <= BG_GRAY_MAX):
|
||
x2 = x
|
||
break
|
||
|
||
if x1 >= x2 or (x2 - x1) < w * 0.5:
|
||
# 如果这个 segment 无法确定宽度,可能不是有效卡片,暂不参与宽度投票
|
||
# 但为了不漏掉,暂时先记录,用默认值填充
|
||
logger.warning(f" Warning: Segment {y1}-{y2} has weird width {x2-x1}.")
|
||
pass
|
||
else:
|
||
candidate_x1.append(x1)
|
||
candidate_x2.append(x2)
|
||
|
||
temp_valid_segments.append((y1, y2))
|
||
|
||
# 计算统一宽度
|
||
if not candidate_x1:
|
||
logger.info(" No valid width detected. Using default.")
|
||
final_x1 = 0
|
||
final_x2 = w
|
||
else:
|
||
# 使用中位数或众数来消除噪声
|
||
# 考虑到对齐,Min x1 和 Max x2 可能更合适?或者 Median。
|
||
# 通常卡片是对齐的,所以 x1 应该几乎一样。
|
||
final_x1 = int(np.median(candidate_x1))
|
||
final_x2 = int(np.median(candidate_x2))
|
||
|
||
# 稍微加点 Padding (但不要超过图片边界)
|
||
final_x1 = max(0, final_x1 - 5)
|
||
final_x2 = min(w, final_x2 + 5)
|
||
|
||
logger.info(f" Unified Width: X={final_x1}-{final_x2}, W={final_x2 - final_x1}")
|
||
|
||
# 3. 过滤高度异常的卡片 (Height Filtering)
|
||
# 计算所有潜在卡片的高度
|
||
final_cards = []
|
||
if not temp_valid_segments:
|
||
logger.info(" No segments found.")
|
||
else:
|
||
heights = [y2 - y1 for y1, y2 in temp_valid_segments]
|
||
if not heights:
|
||
logger.info(" No heights to calculate.")
|
||
else:
|
||
max_h = max(heights)
|
||
median_h = np.median(heights)
|
||
|
||
# 策略:如果高度差异较大,丢弃过小的卡片
|
||
# 阈值设定为 Median 的 75% (允许一定程度的偏差,如 220 vs 258 是允许的,但 150 vs 250 应被过滤)
|
||
# 使用 Median 比 Max 更稳健,避免被单个超高卡片(如广告)带偏
|
||
threshold_h = median_h * 0.70
|
||
|
||
for (y1, y2), h in zip(temp_valid_segments, heights):
|
||
if h < threshold_h:
|
||
logger.info(f" Filtering out segment Y={y1}-{y2} (H={h}) because it's too short (Threshold={threshold_h:.1f}).")
|
||
else:
|
||
final_cards.append((y1, y2, final_x1, final_x2))
|
||
logger.info(f" Card: Y={y1}-{y2}, X={final_x1}-{final_x2}, H={h}")
|
||
|
||
# 4. 保存结果
|
||
if output_dir is None:
|
||
output_dir = os.path.dirname(img_path)
|
||
|
||
base_name = os.path.basename(img_path)
|
||
stem, ext = os.path.splitext(base_name)
|
||
|
||
# 准备可视化标记图
|
||
debug_img = img.copy()
|
||
|
||
results = []
|
||
|
||
# 准备 JSON 数据结构
|
||
json_data = {
|
||
"image": base_name,
|
||
"width": w,
|
||
"height": h,
|
||
"cards": []
|
||
}
|
||
|
||
# 准备 _vl.jpg (只画框,不画红点)
|
||
vl_img = img.copy()
|
||
|
||
logger.info(f" Step [2.1/VL] 准备在 VL 图片上绘制 {len(final_cards)} 个场站的绿色方框...")
|
||
|
||
for idx, (y1, y2, x1, x2) in enumerate(final_cards):
|
||
# 计算点击点 (左上角,避免被底部按钮遮挡)
|
||
# 策略:X偏移 15%, Y偏移 20%
|
||
w_card = x2 - x1
|
||
h_card = y2 - y1
|
||
click_x = int(x1 + w_card * 0.15)
|
||
click_y = int(y1 + h_card * 0.20)
|
||
|
||
# [修改] 不再保存单张子图,只记录元数据
|
||
# card = img[y1:y2, x1:x2]
|
||
# 文件名添加坐标: _ClickX_ClickY
|
||
# out_name = f"{stem}_{idx + 1}_{click_x}_{click_y}{ext}"
|
||
# out_path = os.path.join(output_dir, out_name)
|
||
# cv2.imwrite(out_path, card)
|
||
# logger.info(f" Saved {out_path}")
|
||
|
||
# results.append((out_path, (click_x, click_y)))
|
||
|
||
# 在标记图上画红点 (实心圆, 半径10, 红色BGR)
|
||
cv2.circle(debug_img, (click_x, click_y), 10, (0, 0, 255), -1)
|
||
# [修改] 必须画绿框,因为后续视觉模型依赖这个框来识别范围
|
||
cv2.rectangle(debug_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||
|
||
# 在 _vl 图上只画绿框
|
||
cv2.rectangle(vl_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||
|
||
# 收集 JSON 数据
|
||
card_info = {
|
||
"id": idx + 1,
|
||
"rect": [x1, y1, x2, y2],
|
||
"click_point": [click_x, click_y]
|
||
}
|
||
json_data["cards"].append(card_info)
|
||
|
||
# 记录区域信息供调用者使用 (如果需要)
|
||
# 格式: (None, (click_x, click_y), (x1, y1, x2, y2))
|
||
results.append((None, (click_x, click_y), (x1, y1, x2, y2)))
|
||
|
||
# [删除] 之前生成的单张 _for_vl.jpg 逻辑已移除
|
||
|
||
# 保存标记图 (_flag.jpg)
|
||
if save_debug:
|
||
flag_out_path = os.path.join(output_dir, f"{stem}_flag{ext}")
|
||
save_image(flag_out_path, debug_img)
|
||
logger.info(f" Saved Debug Image: {flag_out_path}")
|
||
|
||
# 保存 _vl.jpg
|
||
vl_out_path = os.path.join(output_dir, f"{stem}_vl{ext}")
|
||
save_image(vl_out_path, vl_img)
|
||
logger.info(f" Step [2.2/VL] 已保存带有绿色方框的图片: {vl_out_path}")
|
||
|
||
# 保存 .json
|
||
import json
|
||
json_out_path = os.path.join(output_dir, f"{stem}.json")
|
||
with open(json_out_path, 'w', encoding='utf-8') as f:
|
||
json.dump(json_data, f, ensure_ascii=False, indent=4)
|
||
logger.info(f" Step [2.3/JSON] 已保存场站坐标元数据: {json_out_path}")
|
||
|
||
return results
|