'commit'
This commit is contained in:
@@ -1,15 +1,15 @@
|
||||
|
||||
# 采集配置
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
MAX_STATIONS_COUNT = 1
|
||||
FIRST_RUN_ONLY_ONE_STATION = False
|
||||
MAX_STATIONS_COUNT = 2
|
||||
FIRST_RUN_ONLY_ONE_STATION = True
|
||||
REDIS_STATION_EXPIRE = 120
|
||||
DATA_RETENTION_DAYS = 365
|
||||
|
||||
# 等待时间配置 (秒)
|
||||
WAIT_DETAIL_PAGE_LOAD = 1.5
|
||||
WAIT_DETAIL_PAGE_LOAD = 2.5
|
||||
WAIT_BACK_TO_LIST = 0.5
|
||||
WAIT_AFTER_SCROLL = 2.0
|
||||
WAIT_AFTER_SCROLL = 1.0
|
||||
|
||||
# 坐标计算与安全防护
|
||||
SAFE_EXCLUDE_RATIO = 0.40
|
||||
|
||||
Binary file not shown.
@@ -31,7 +31,8 @@ from Apps.XinDianTu.Config.Setting import (
|
||||
MAX_STATIONS_COUNT, REDIS_STATION_EXPIRE,
|
||||
WAIT_DETAIL_PAGE_LOAD, WAIT_BACK_TO_LIST, WAIT_AFTER_SCROLL,
|
||||
SAFE_EXCLUDE_RATIO,
|
||||
BOTTOM_SAFE_EXCLUDE_RATIO
|
||||
BOTTOM_SAFE_EXCLUDE_RATIO,
|
||||
FIRST_RUN_ONLY_ONE_STATION,
|
||||
)
|
||||
|
||||
# --- 用户配置区域 ---
|
||||
@@ -49,19 +50,9 @@ TEST_CLEAR_REDIS = True
|
||||
# 请尝试减小 SCROLL_DISTANCE_RATIO(例如设置为 0.4 或 0.3)。
|
||||
# 这样每次滑动的距离变短,可以确保所有场站都能被完整显示并识别。
|
||||
|
||||
# 配置日志输出
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
logger = logging.getLogger("StationList")
|
||||
from Apps.XinDianTu.Kit import setup_logger
|
||||
|
||||
# 强制设置所有相关模块的日志级别为 INFO,防止被第三方库干扰
|
||||
logging.getLogger("OpenXinDianTu").setLevel(logging.INFO)
|
||||
logging.getLogger("FullProcess").setLevel(logging.INFO)
|
||||
logger = setup_logger("StationList")
|
||||
|
||||
|
||||
class XinDianTuCrawler(BaseCrawler):
|
||||
@@ -168,6 +159,9 @@ async def get_station_list(d, service, uploader, max_stations_count=MAX_STATIONS
|
||||
# 【优化】不再在每页滚动时检查兔子广告,仅在 Opener 进入时检查一次
|
||||
# 如果后续发现有其它非兔子广告弹出,可在此处恢复非兔子广告的检测逻辑
|
||||
ad_top_y_norm = 0.78 # 默认的点击边界 (0.78)
|
||||
# 新策略:滚动后的页面完全依赖“速度躲兔子”,不再因为靠近底部而丢弃卡片
|
||||
if scroll_count > 1:
|
||||
ad_top_y_norm = 1.0
|
||||
|
||||
# 检查是否已经滚动到底部 (排除状态栏后,内容与上次一致)
|
||||
current_md5 = Kit.get_image_content_md5(
|
||||
@@ -212,6 +206,11 @@ async def get_station_list(d, service, uploader, max_stations_count=MAX_STATIONS
|
||||
new_stations_processed_in_page = 0
|
||||
if json_metadata.get("cards") and stations:
|
||||
for idx, card in enumerate(json_metadata["cards"]):
|
||||
# 首屏策略:只处理第一个场站,其余留待滚动后在安全窗口内处理
|
||||
if FIRST_RUN_ONLY_ONE_STATION and scroll_count == 1 and idx > 0:
|
||||
logger.info("首屏仅处理第一个场站,跳过当前卡片。")
|
||||
continue
|
||||
|
||||
# 检查是否已达到最大采集数量(按新采集的场站数量限制)
|
||||
if total_new_processed_count >= max_stations_count:
|
||||
break
|
||||
|
||||
@@ -810,6 +810,46 @@ def crop_cards_from_image(img_path, output_dir=None, save_debug=True):
|
||||
|
||||
temp_valid_segments.append((y1, y2))
|
||||
|
||||
# 对过长的段进行内部切分,避免将两个卡片合并为一个
|
||||
refined_segments = []
|
||||
SPLIT_GAP_MIN = 8
|
||||
SPLIT_MARGIN = 6
|
||||
GAP_STD_STRICT = max(0.0, GAP_STD_MAX - 3.0)
|
||||
for y1, y2 in temp_valid_segments:
|
||||
split_points = []
|
||||
run_len = 0
|
||||
run_start = None
|
||||
for yy in range(y1 + SPLIT_MARGIN, y2 - SPLIT_MARGIN):
|
||||
rm = row_means[yy]
|
||||
rs = row_stds[yy]
|
||||
em = edge_means[yy]
|
||||
is_gap_line = (GAP_MEAN_MIN <= rm <= GAP_MEAN_MAX) and (rs < GAP_STD_STRICT) and (BG_GRAY_MIN <= em <= BG_GRAY_MAX)
|
||||
if is_gap_line:
|
||||
if run_len == 0:
|
||||
run_start = yy
|
||||
run_len += 1
|
||||
else:
|
||||
if run_len >= SPLIT_GAP_MIN:
|
||||
sp = (run_start + yy) // 2
|
||||
split_points.append(sp)
|
||||
run_len = 0
|
||||
run_start = None
|
||||
if run_len >= SPLIT_GAP_MIN and run_start is not None:
|
||||
sp = (run_start + (y2 - SPLIT_MARGIN)) // 2
|
||||
split_points.append(sp)
|
||||
|
||||
if split_points:
|
||||
prev = y1
|
||||
for sp in split_points:
|
||||
if sp - prev > 100:
|
||||
refined_segments.append((prev, sp))
|
||||
prev = sp
|
||||
if y2 - prev > 100:
|
||||
refined_segments.append((prev, y2))
|
||||
else:
|
||||
refined_segments.append((y1, y2))
|
||||
temp_valid_segments = refined_segments
|
||||
|
||||
# 计算统一宽度
|
||||
if not candidate_x1:
|
||||
logger.info(" No valid width detected. Using default.")
|
||||
@@ -877,67 +917,20 @@ def crop_cards_from_image(img_path, output_dir=None, save_debug=True):
|
||||
# 准备 _vl.jpg (只画框,不画红点)
|
||||
vl_img = img.copy()
|
||||
|
||||
# 预先计算“列表浮动区域”的蓝色矩形坐标
|
||||
rabbit_x1 = int(w * 0.04)
|
||||
rabbit_x2 = int(w * 0.96)
|
||||
rabbit_y1 = int(h * 0.74)
|
||||
rabbit_y2 = int(h * 0.86)
|
||||
|
||||
# 在调试图上画出蓝色矩形
|
||||
try:
|
||||
cv2.rectangle(debug_img, (rabbit_x1, rabbit_y1), (rabbit_x2, rabbit_y2), (255, 0, 0), 3)
|
||||
cv2.rectangle(vl_img, (rabbit_x1, rabbit_y1), (rabbit_x2, rabbit_y2), (255, 0, 0), 3)
|
||||
logger.info(f" 标记列表浮动蓝色区域: X={rabbit_x1}-{rabbit_x2}, Y={rabbit_y1}-{rabbit_y2}")
|
||||
except Exception as e:
|
||||
logger.warning(f"标记兔子广告蓝色区域失败: {e}")
|
||||
|
||||
# 基于蓝色矩形过滤与列表浮动区域有交集的场站卡片
|
||||
if final_cards:
|
||||
logger.info(f" [蓝框过滤] 初始卡片数量: {len(final_cards)}")
|
||||
logger.info(
|
||||
f" [蓝框过滤] 蓝框坐标: X={rabbit_x1}-{rabbit_x2}, Y={rabbit_y1}-{rabbit_y2}"
|
||||
)
|
||||
|
||||
def _intersects(card):
|
||||
y1, y2, x1, x2 = card
|
||||
cx1, cy1, cx2, cy2 = x1, y1, x2, y2
|
||||
ix1 = max(cx1, rabbit_x1)
|
||||
iy1 = max(cy1, rabbit_y1)
|
||||
ix2 = min(cx2, rabbit_x2)
|
||||
iy2 = min(cy2, rabbit_y2)
|
||||
return ix1 < ix2 and iy1 < iy2
|
||||
|
||||
original_cards = list(final_cards)
|
||||
filtered_cards = []
|
||||
|
||||
for idx, card in enumerate(original_cards, start=1):
|
||||
y1, y2, x1, x2 = card
|
||||
if _intersects(card):
|
||||
logger.info(
|
||||
f" [蓝框过滤] 丢弃卡片#{idx}: X={x1}-{x2}, Y={y1}-{y2} (与蓝框有交集)"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f" [蓝框过滤] 保留卡片#{idx}: X={x1}-{x2}, Y={y1}-{y2} (与蓝框无交集)"
|
||||
)
|
||||
filtered_cards.append(card)
|
||||
|
||||
logger.info(
|
||||
f" [蓝框过滤] 最终保留 {len(filtered_cards)} 个卡片, 丢弃 {len(original_cards) - len(filtered_cards)} 个"
|
||||
)
|
||||
final_cards = filtered_cards
|
||||
else:
|
||||
logger.info(" [蓝框过滤] 没有可过滤的卡片。")
|
||||
|
||||
logger.info(f" Step [2.1/VL] 准备在 VL 图片上绘制 {len(final_cards)} 个场站的绿色方框...")
|
||||
|
||||
for idx, (y1, y2, x1, x2) in enumerate(final_cards):
|
||||
# 轻微向上扩展卡片上边界,避免漏掉标题区域
|
||||
PAD_TOP = 5
|
||||
draw_y1 = max(0, y1 - PAD_TOP)
|
||||
draw_y2 = y2
|
||||
|
||||
# 计算点击点 (左上角,避免被底部按钮遮挡)
|
||||
# 策略:X偏移 15%, Y偏移 20%
|
||||
w_card = x2 - x1
|
||||
h_card = y2 - y1
|
||||
h_card = draw_y2 - draw_y1
|
||||
click_x = int(x1 + w_card * 0.15)
|
||||
click_y = int(y1 + h_card * 0.20)
|
||||
click_y = int(draw_y1 + h_card * 0.20)
|
||||
|
||||
# [修改] 不再保存单张子图,只记录元数据
|
||||
# card = img[y1:y2, x1:x2]
|
||||
@@ -952,28 +945,28 @@ def crop_cards_from_image(img_path, output_dir=None, save_debug=True):
|
||||
# 在标记图上画红点 (实心圆, 半径10, 红色BGR)
|
||||
cv2.circle(debug_img, (click_x, click_y), 10, (0, 0, 255), -1)
|
||||
# [修改] 必须画绿框,因为后续视觉模型依赖这个框来识别范围
|
||||
cv2.rectangle(debug_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.rectangle(debug_img, (x1, draw_y1), (x2, draw_y2), (0, 255, 0), 2)
|
||||
|
||||
# 在 _vl 图上只画绿框
|
||||
cv2.rectangle(vl_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.rectangle(vl_img, (x1, draw_y1), (x2, draw_y2), (0, 255, 0), 2)
|
||||
|
||||
# 收集 JSON 数据
|
||||
card_info = {
|
||||
"id": idx + 1,
|
||||
"rect": [x1, y1, x2, y2],
|
||||
"rect": [x1, draw_y1, x2, draw_y2],
|
||||
"bounds_norm": {
|
||||
"left": x1 / w,
|
||||
"top": y1 / h,
|
||||
"top": draw_y1 / h,
|
||||
"right": x2 / w,
|
||||
"bottom": y2 / h
|
||||
"bottom": draw_y2 / h
|
||||
},
|
||||
"click_point": [click_x, click_y]
|
||||
}
|
||||
json_data["cards"].append(card_info)
|
||||
|
||||
# 记录区域信息供调用者使用 (如果需要)
|
||||
# 格式: (None, (click_x, click_y), (x1, y1, x2, y2))
|
||||
results.append((None, (click_x, click_y), (x1, y1, x2, y2)))
|
||||
# 格式: (None, (click_x, click_y), (x1, draw_y1, x2, draw_y2))
|
||||
results.append((None, (click_x, click_y), (x1, draw_y1, x2, draw_y2)))
|
||||
|
||||
# [删除] 之前生成的单张 _for_vl.jpg 逻辑已移除
|
||||
|
||||
|
||||
@@ -1,23 +1,17 @@
|
||||
# coding=utf-8
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
|
||||
import uiautomator2 as u2
|
||||
|
||||
from Apps.XinDianTu.Kit import take_screenshot
|
||||
from Apps.XinDianTu.Kit import take_screenshot, setup_logger
|
||||
from Apps.XinDianTu.ReadImageKit import ReadImageKit
|
||||
from Config.Config import TEMP_IMAGE_DIR
|
||||
|
||||
# pip install adbutils
|
||||
# 配置日志输出,方便调试和监控
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger("OpenXinDianTu")
|
||||
logger = setup_logger("OpenXinDianTu")
|
||||
|
||||
# 获取当前脚本所在目录
|
||||
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import hashlib
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
@@ -10,6 +9,7 @@ project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(_
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
from Apps.XinDianTu.Kit import setup_logger
|
||||
from Apps.XinDianTu.ReadImageKit import ReadImageKit
|
||||
from DbKit.Db import Db
|
||||
from Config.Config import DB_URL, PRICE_FLATTEN_TO_24H_GLOBAL
|
||||
@@ -18,8 +18,7 @@ from Model.StationStatus import StationStatus
|
||||
from Model.StationPriceSchedule import StationPriceSchedule
|
||||
from Apps.XinDianTu.Config.Setting import PRICE_FLATTEN_TO_24H
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
logger = setup_logger("Service")
|
||||
|
||||
class XinDianTuService:
|
||||
def __init__(self):
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user