This commit is contained in:
HuangHai
2026-01-13 19:14:24 +08:00
parent 0d4f892ead
commit 62495ab39d
2 changed files with 14 additions and 14 deletions

View File

@@ -3,10 +3,8 @@
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
# 调大步长 (从 0.25 调至 0.5),提高采集效率
SCROLL_DISTANCE_RATIO = 0.5
# 最大滑动/翻页次数,达到此次数后停止采集
MAX_SCROLLS = 100
# 默认抓取半径(公里),当检测到场站距离超过此值时停止采集
MAX_CRAWL_DISTANCE = 50
# 最大采集场站数量,达到此次数后停止采集
MAX_STATIONS_COUNT = 100
# 场站去重过期时间(秒),在此时间内重复出现的场站不会再次点击进入详情页
REDIS_STATION_EXPIRE = 120
@@ -15,11 +13,11 @@ DATA_RETENTION_DAYS = 365
# 等待时间配置 (秒)
# 点击进入详情页后等待加载的时间
WAIT_DETAIL_PAGE_LOAD = 2.5
WAIT_DETAIL_PAGE_LOAD = 1.5
# 从详情页返回列表页后等待页面刷新的时间
WAIT_BACK_TO_LIST = 1.5
WAIT_BACK_TO_LIST = 0.5
# 执行滑动操作后等待页面内容加载和稳定的时间
WAIT_AFTER_SCROLL = 3.0
WAIT_AFTER_SCROLL = 2.0
# 坐标计算与安全防护
# 屏幕顶部安全排除比例 (0.0~1.0),此比例区域内不进行点击(避开状态栏、顶部菜单、横幅广告等)

View File

@@ -28,9 +28,8 @@ from Config.Config import (
)
from Apps.XinDianTu.Config.Setting import (
SCROLL_DISTANCE_RATIO,
MAX_SCROLLS, REDIS_STATION_EXPIRE,
MAX_STATIONS_COUNT, REDIS_STATION_EXPIRE,
WAIT_DETAIL_PAGE_LOAD, WAIT_BACK_TO_LIST, WAIT_AFTER_SCROLL,
MAX_CRAWL_DISTANCE,
SAFE_EXCLUDE_RATIO,
BOTTOM_SAFE_EXCLUDE_RATIO
)
@@ -73,7 +72,7 @@ class XinDianTuCrawler(BaseCrawler):
def __init__(self, service=None):
super().__init__(service)
# 初始化配置参数
self.max_scrolls = MAX_SCROLLS
self.max_stations_count = MAX_STATIONS_COUNT
self.uploader = ObsUploader()
self.redis_kit = RedisKit()
@@ -139,7 +138,7 @@ async def analyze_prices_background(service, station_name, image_paths, device_i
return True
async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
async def get_station_list(d, service, uploader, max_stations_count=MAX_STATIONS_COUNT):
"""
获取场站列表并处理翻页 (异步优化版)
"""
@@ -151,14 +150,17 @@ async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
device_info['width'] = w
device_info['height'] = h
logger.info(f"开始爬取列表,设备: {device_info.get('productName')} | 分辨率: {w}x{h}")
logger.info(f"开始爬取列表,设备: {device_info.get('productName')} | 分辨率: {w}x{h} | 目标数量: {max_stations_count}")
background_tasks = []
last_list_md5 = None
no_new_data_count = 0
total_processed_count = 0
scroll_count = 0
for i in range(max_scrolls + 1):
logger.info(f"正在处理第 {i + 1} 页...")
while total_processed_count < max_stations_count:
scroll_count += 1
logger.info(f"正在处理第 {scroll_count} 次滚动 (已采集: {total_processed_count}/{max_stations_count})...")
# 1. 拍摄截图
image_uuid = str(uuid.uuid4())