'commit'
This commit is contained in:
@@ -3,10 +3,8 @@
|
||||
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
|
||||
# 调大步长 (从 0.25 调至 0.5),提高采集效率
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
# 最大滑动/翻页次数,达到此次数后停止采集
|
||||
MAX_SCROLLS = 100
|
||||
# 默认抓取半径(公里),当检测到场站距离超过此值时停止采集
|
||||
MAX_CRAWL_DISTANCE = 50
|
||||
# 最大采集场站数量,达到此次数后停止采集
|
||||
MAX_STATIONS_COUNT = 100
|
||||
# 场站去重过期时间(秒),在此时间内重复出现的场站不会再次点击进入详情页
|
||||
REDIS_STATION_EXPIRE = 120
|
||||
|
||||
@@ -15,11 +13,11 @@ DATA_RETENTION_DAYS = 365
|
||||
|
||||
# 等待时间配置 (秒)
|
||||
# 点击进入详情页后等待加载的时间
|
||||
WAIT_DETAIL_PAGE_LOAD = 2.5
|
||||
WAIT_DETAIL_PAGE_LOAD = 1.5
|
||||
# 从详情页返回列表页后等待页面刷新的时间
|
||||
WAIT_BACK_TO_LIST = 1.5
|
||||
WAIT_BACK_TO_LIST = 0.5
|
||||
# 执行滑动操作后等待页面内容加载和稳定的时间
|
||||
WAIT_AFTER_SCROLL = 3.0
|
||||
WAIT_AFTER_SCROLL = 2.0
|
||||
|
||||
# 坐标计算与安全防护
|
||||
# 屏幕顶部安全排除比例 (0.0~1.0),此比例区域内不进行点击(避开状态栏、顶部菜单、横幅广告等)
|
||||
|
||||
@@ -28,9 +28,8 @@ from Config.Config import (
|
||||
)
|
||||
from Apps.XinDianTu.Config.Setting import (
|
||||
SCROLL_DISTANCE_RATIO,
|
||||
MAX_SCROLLS, REDIS_STATION_EXPIRE,
|
||||
MAX_STATIONS_COUNT, REDIS_STATION_EXPIRE,
|
||||
WAIT_DETAIL_PAGE_LOAD, WAIT_BACK_TO_LIST, WAIT_AFTER_SCROLL,
|
||||
MAX_CRAWL_DISTANCE,
|
||||
SAFE_EXCLUDE_RATIO,
|
||||
BOTTOM_SAFE_EXCLUDE_RATIO
|
||||
)
|
||||
@@ -73,7 +72,7 @@ class XinDianTuCrawler(BaseCrawler):
|
||||
def __init__(self, service=None):
|
||||
super().__init__(service)
|
||||
# 初始化配置参数
|
||||
self.max_scrolls = MAX_SCROLLS
|
||||
self.max_stations_count = MAX_STATIONS_COUNT
|
||||
self.uploader = ObsUploader()
|
||||
self.redis_kit = RedisKit()
|
||||
|
||||
@@ -139,7 +138,7 @@ async def analyze_prices_background(service, station_name, image_paths, device_i
|
||||
return True
|
||||
|
||||
|
||||
async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
|
||||
async def get_station_list(d, service, uploader, max_stations_count=MAX_STATIONS_COUNT):
|
||||
"""
|
||||
获取场站列表并处理翻页 (异步优化版)
|
||||
"""
|
||||
@@ -151,14 +150,17 @@ async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
|
||||
device_info['width'] = w
|
||||
device_info['height'] = h
|
||||
|
||||
logger.info(f"开始爬取列表,设备: {device_info.get('productName')} | 分辨率: {w}x{h}")
|
||||
logger.info(f"开始爬取列表,设备: {device_info.get('productName')} | 分辨率: {w}x{h} | 目标数量: {max_stations_count}")
|
||||
|
||||
background_tasks = []
|
||||
last_list_md5 = None
|
||||
no_new_data_count = 0
|
||||
total_processed_count = 0
|
||||
scroll_count = 0
|
||||
|
||||
for i in range(max_scrolls + 1):
|
||||
logger.info(f"正在处理第 {i + 1} 页...")
|
||||
while total_processed_count < max_stations_count:
|
||||
scroll_count += 1
|
||||
logger.info(f"正在处理第 {scroll_count} 次滚动 (已采集: {total_processed_count}/{max_stations_count})...")
|
||||
|
||||
# 1. 拍摄截图
|
||||
image_uuid = str(uuid.uuid4())
|
||||
|
||||
Reference in New Issue
Block a user