'commit'

2026-01-17 08:49:42 +08:00
parent b12c9de620
commit a34083cd47
10 changed files with 74 additions and 89 deletions
--- a/Apps/XinDianTu/Config/Setting.py
+++ b/Apps/XinDianTu/Config/Setting.py
@@ -1,15 +1,15 @@

 # 采集配置
 SCROLL_DISTANCE_RATIO = 0.5
-MAX_STATIONS_COUNT = 1
-FIRST_RUN_ONLY_ONE_STATION = False
+MAX_STATIONS_COUNT = 2
+FIRST_RUN_ONLY_ONE_STATION = True
 REDIS_STATION_EXPIRE = 120
 DATA_RETENTION_DAYS = 365

 # 等待时间配置 (秒)
-WAIT_DETAIL_PAGE_LOAD = 1.5
+WAIT_DETAIL_PAGE_LOAD = 2.5
 WAIT_BACK_TO_LIST = 0.5
-WAIT_AFTER_SCROLL = 2.0
+WAIT_AFTER_SCROLL = 1.0

 # 坐标计算与安全防护
 SAFE_EXCLUDE_RATIO = 0.40
--- a/Apps/XinDianTu/Config/pycache/Setting.cpython-310.pyc
+++ b/Apps/XinDianTu/Config/pycache/Setting.cpython-310.pyc
--- a/Apps/XinDianTu/Crawler.py
+++ b/Apps/XinDianTu/Crawler.py
@@ -31,7 +31,8 @@ from Apps.XinDianTu.Config.Setting import (
    MAX_STATIONS_COUNT, REDIS_STATION_EXPIRE,
    WAIT_DETAIL_PAGE_LOAD, WAIT_BACK_TO_LIST, WAIT_AFTER_SCROLL,
    SAFE_EXCLUDE_RATIO,
-    BOTTOM_SAFE_EXCLUDE_RATIO
+    BOTTOM_SAFE_EXCLUDE_RATIO,
+    FIRST_RUN_ONLY_ONE_STATION,
 )

 # --- 用户配置区域 ---
@@ -49,19 +50,9 @@ TEST_CLEAR_REDIS = True
 # 请尝试减小 SCROLL_DISTANCE_RATIO（例如设置为 0.4 或 0.3）。
 # 这样每次滑动的距离变短，可以确保所有场站都能被完整显示并识别。

-# 配置日志输出
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler(sys.stdout)
-    ]
-)
-logger = logging.getLogger("StationList")
+from Apps.XinDianTu.Kit import setup_logger

-# 强制设置所有相关模块的日志级别为 INFO，防止被第三方库干扰
-logging.getLogger("OpenXinDianTu").setLevel(logging.INFO)
-logging.getLogger("FullProcess").setLevel(logging.INFO)
+logger = setup_logger("StationList")


 class XinDianTuCrawler(BaseCrawler):
@@ -168,6 +159,9 @@ async def get_station_list(d, service, uploader, max_stations_count=MAX_STATIONS
        # 【优化】不再在每页滚动时检查兔子广告，仅在 Opener 进入时检查一次
        # 如果后续发现有其它非兔子广告弹出，可在此处恢复非兔子广告的检测逻辑
        ad_top_y_norm = 0.78  # 默认的点击边界 (0.78)
+        # 新策略：滚动后的页面完全依赖“速度躲兔子”，不再因为靠近底部而丢弃卡片
+        if scroll_count > 1:
+            ad_top_y_norm = 1.0
        
        # 检查是否已经滚动到底部 (排除状态栏后，内容与上次一致)
        current_md5 = Kit.get_image_content_md5(
@@ -212,6 +206,11 @@ async def get_station_list(d, service, uploader, max_stations_count=MAX_STATIONS
        new_stations_processed_in_page = 0
        if json_metadata.get("cards") and stations:
            for idx, card in enumerate(json_metadata["cards"]):
+                # 首屏策略：只处理第一个场站，其余留待滚动后在安全窗口内处理
+                if FIRST_RUN_ONLY_ONE_STATION and scroll_count == 1 and idx > 0:
+                    logger.info("首屏仅处理第一个场站，跳过当前卡片。")
+                    continue
+
                # 检查是否已达到最大采集数量（按新采集的场站数量限制）
                if total_new_processed_count >= max_stations_count:
                    break
--- a/Apps/XinDianTu/Kit.py
+++ b/Apps/XinDianTu/Kit.py
@@ -810,6 +810,46 @@ def crop_cards_from_image(img_path, output_dir=None, save_debug=True):

        temp_valid_segments.append((y1, y2))

+    # 对过长的段进行内部切分，避免将两个卡片合并为一个
+    refined_segments = []
+    SPLIT_GAP_MIN = 8
+    SPLIT_MARGIN = 6
+    GAP_STD_STRICT = max(0.0, GAP_STD_MAX - 3.0)
+    for y1, y2 in temp_valid_segments:
+        split_points = []
+        run_len = 0
+        run_start = None
+        for yy in range(y1 + SPLIT_MARGIN, y2 - SPLIT_MARGIN):
+            rm = row_means[yy]
+            rs = row_stds[yy]
+            em = edge_means[yy]
+            is_gap_line = (GAP_MEAN_MIN <= rm <= GAP_MEAN_MAX) and (rs < GAP_STD_STRICT) and (BG_GRAY_MIN <= em <= BG_GRAY_MAX)
+            if is_gap_line:
+                if run_len == 0:
+                    run_start = yy
+                run_len += 1
+            else:
+                if run_len >= SPLIT_GAP_MIN:
+                    sp = (run_start + yy) // 2
+                    split_points.append(sp)
+                run_len = 0
+                run_start = None
+        if run_len >= SPLIT_GAP_MIN and run_start is not None:
+            sp = (run_start + (y2 - SPLIT_MARGIN)) // 2
+            split_points.append(sp)
+
+        if split_points:
+            prev = y1
+            for sp in split_points:
+                if sp - prev > 100:
+                    refined_segments.append((prev, sp))
+                prev = sp
+            if y2 - prev > 100:
+                refined_segments.append((prev, y2))
+        else:
+            refined_segments.append((y1, y2))
+    temp_valid_segments = refined_segments
+
    # 计算统一宽度
    if not candidate_x1:
        logger.info("  No valid width detected. Using default.")
@@ -877,67 +917,20 @@ def crop_cards_from_image(img_path, output_dir=None, save_debug=True):
    # 准备 _vl.jpg (只画框，不画红点)
    vl_img = img.copy()

-    # 预先计算“列表浮动区域”的蓝色矩形坐标
-    rabbit_x1 = int(w * 0.04)
-    rabbit_x2 = int(w * 0.96)
-    rabbit_y1 = int(h * 0.74)
-    rabbit_y2 = int(h * 0.86)
-
-    # 在调试图上画出蓝色矩形
-    try:
-        cv2.rectangle(debug_img, (rabbit_x1, rabbit_y1), (rabbit_x2, rabbit_y2), (255, 0, 0), 3)
-        cv2.rectangle(vl_img, (rabbit_x1, rabbit_y1), (rabbit_x2, rabbit_y2), (255, 0, 0), 3)
-        logger.info(f"  标记列表浮动蓝色区域: X={rabbit_x1}-{rabbit_x2}, Y={rabbit_y1}-{rabbit_y2}")
-    except Exception as e:
-        logger.warning(f"标记兔子广告蓝色区域失败: {e}")
-
-    # 基于蓝色矩形过滤与列表浮动区域有交集的场站卡片
-    if final_cards:
-        logger.info(f"  [蓝框过滤] 初始卡片数量: {len(final_cards)}")
-        logger.info(
-            f"  [蓝框过滤] 蓝框坐标: X={rabbit_x1}-{rabbit_x2}, Y={rabbit_y1}-{rabbit_y2}"
-        )
-
-        def _intersects(card):
-            y1, y2, x1, x2 = card
-            cx1, cy1, cx2, cy2 = x1, y1, x2, y2
-            ix1 = max(cx1, rabbit_x1)
-            iy1 = max(cy1, rabbit_y1)
-            ix2 = min(cx2, rabbit_x2)
-            iy2 = min(cy2, rabbit_y2)
-            return ix1 < ix2 and iy1 < iy2
-
-        original_cards = list(final_cards)
-        filtered_cards = []
-
-        for idx, card in enumerate(original_cards, start=1):
-            y1, y2, x1, x2 = card
-            if _intersects(card):
-                logger.info(
-                    f"  [蓝框过滤] 丢弃卡片#{idx}: X={x1}-{x2}, Y={y1}-{y2} (与蓝框有交集)"
-                )
-            else:
-                logger.info(
-                    f"  [蓝框过滤] 保留卡片#{idx}: X={x1}-{x2}, Y={y1}-{y2} (与蓝框无交集)"
-                )
-                filtered_cards.append(card)
-
-        logger.info(
-            f"  [蓝框过滤] 最终保留 {len(filtered_cards)} 个卡片, 丢弃 {len(original_cards) - len(filtered_cards)} 个"
-        )
-        final_cards = filtered_cards
-    else:
-        logger.info("  [蓝框过滤] 没有可过滤的卡片。")
-
    logger.info(f"  Step [2.1/VL] 准备在 VL 图片上绘制 {len(final_cards)} 个场站的绿色方框...")

    for idx, (y1, y2, x1, x2) in enumerate(final_cards):
+        # 轻微向上扩展卡片上边界，避免漏掉标题区域
+        PAD_TOP = 5
+        draw_y1 = max(0, y1 - PAD_TOP)
+        draw_y2 = y2
+
        # 计算点击点 (左上角，避免被底部按钮遮挡)
        # 策略：X偏移 15%, Y偏移 20%
        w_card = x2 - x1
-        h_card = y2 - y1
+        h_card = draw_y2 - draw_y1
        click_x = int(x1 + w_card * 0.15)
-        click_y = int(y1 + h_card * 0.20)
+        click_y = int(draw_y1 + h_card * 0.20)

        # [修改] 不再保存单张子图，只记录元数据
        # card = img[y1:y2, x1:x2]
@@ -952,28 +945,28 @@ def crop_cards_from_image(img_path, output_dir=None, save_debug=True):
        # 在标记图上画红点 (实心圆, 半径10, 红色BGR)
        cv2.circle(debug_img, (click_x, click_y), 10, (0, 0, 255), -1)
        # [修改] 必须画绿框，因为后续视觉模型依赖这个框来识别范围
-        cv2.rectangle(debug_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        cv2.rectangle(debug_img, (x1, draw_y1), (x2, draw_y2), (0, 255, 0), 2)

        # 在 _vl 图上只画绿框
-        cv2.rectangle(vl_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
+        cv2.rectangle(vl_img, (x1, draw_y1), (x2, draw_y2), (0, 255, 0), 2)

        # 收集 JSON 数据
        card_info = {
            "id": idx + 1,
-            "rect": [x1, y1, x2, y2],
+            "rect": [x1, draw_y1, x2, draw_y2],
            "bounds_norm": {
                "left": x1 / w,
-                "top": y1 / h,
+                "top": draw_y1 / h,
                "right": x2 / w,
-                "bottom": y2 / h
+                "bottom": draw_y2 / h
            },
            "click_point": [click_x, click_y]
        }
        json_data["cards"].append(card_info)

        # 记录区域信息供调用者使用 (如果需要)
-        # 格式: (None, (click_x, click_y), (x1, y1, x2, y2))
-        results.append((None, (click_x, click_y), (x1, y1, x2, y2)))
+        # 格式: (None, (click_x, click_y), (x1, draw_y1, x2, draw_y2))
+        results.append((None, (click_x, click_y), (x1, draw_y1, x2, draw_y2)))

        # [删除] 之前生成的单张 _for_vl.jpg 逻辑已移除

--- a/Apps/XinDianTu/Opener.py
+++ b/Apps/XinDianTu/Opener.py
@@ -1,23 +1,17 @@
 # coding=utf-8
 import asyncio
-import logging
 import os
 import time
 import uuid

 import uiautomator2 as u2

-from Apps.XinDianTu.Kit import take_screenshot
+from Apps.XinDianTu.Kit import take_screenshot, setup_logger
 from Apps.XinDianTu.ReadImageKit import ReadImageKit
 from Config.Config import TEMP_IMAGE_DIR

 # pip install adbutils
-# 配置日志输出，方便调试和监控
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger("OpenXinDianTu")
+logger = setup_logger("OpenXinDianTu")

 # 获取当前脚本所在目录
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
--- a/Apps/XinDianTu/Service.py
+++ b/Apps/XinDianTu/Service.py
@@ -1,5 +1,4 @@
 import hashlib
-import logging
 import os
 import re
 import sys
@@ -10,6 +9,7 @@ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(_
 if project_root not in sys.path:
    sys.path.append(project_root)

+from Apps.XinDianTu.Kit import setup_logger
 from Apps.XinDianTu.ReadImageKit import ReadImageKit
 from DbKit.Db import Db
 from Config.Config import DB_URL, PRICE_FLATTEN_TO_24H_GLOBAL
@@ -18,8 +18,7 @@ from Model.StationStatus import StationStatus
 from Model.StationPriceSchedule import StationPriceSchedule
 from Apps.XinDianTu.Config.Setting import PRICE_FLATTEN_TO_24H

-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
-logger = logging.getLogger(__name__)
+logger = setup_logger("Service")

 class XinDianTuService:
    def __init__(self):
--- a/Apps/XinDianTu/pycache/Crawler.cpython-310.pyc
+++ b/Apps/XinDianTu/pycache/Crawler.cpython-310.pyc
--- a/Apps/XinDianTu/pycache/Kit.cpython-310.pyc
+++ b/Apps/XinDianTu/pycache/Kit.cpython-310.pyc
--- a/Apps/XinDianTu/pycache/Opener.cpython-310.pyc
+++ b/Apps/XinDianTu/pycache/Opener.cpython-310.pyc
--- a/Apps/XinDianTu/pycache/Service.cpython-310.pyc
+++ b/Apps/XinDianTu/pycache/Service.cpython-310.pyc