'commit'
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
|
||||
# 采集配置
|
||||
SCROLL_DISTANCE_RATIO = 0.4
|
||||
SCROLL_DISTANCE_RATIO = 0.6
|
||||
MAX_SCROLLS = 100
|
||||
MAX_CRAWL_DISTANCE = 50
|
||||
REDIS_STATION_EXPIRE = 120
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
|
||||
# 采集配置
|
||||
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
|
||||
# 稍微调大步长 (从 0.15 调至 0.25),兼顾避开广告与采集效率
|
||||
SCROLL_DISTANCE_RATIO = 0.25
|
||||
# 调大步长 (从 0.25 调至 0.5),提高采集效率
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
# 最大滑动/翻页次数,达到此次数后停止采集
|
||||
MAX_SCROLLS = 60
|
||||
# 默认抓取半径(公里),当检测到场站距离超过此值时停止采集
|
||||
|
||||
@@ -417,9 +417,9 @@ async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
|
||||
if stations and new_stations_processed == 0:
|
||||
no_new_data_count += 1
|
||||
logger.info(f"本页所有场站均已处理过,连续 {no_new_data_count} 页无新数据。")
|
||||
# 【优化】由于滑动步长较小 (0.15),连续多页重复是正常的,将阈值调大到 10
|
||||
if no_new_data_count >= 10:
|
||||
logger.info("连续 10 页无新数据,判定为已到底或重复循环,提前结束。")
|
||||
# 【优化】由于滑动步长已调大 (0.5),连续多页重复的可能性降低
|
||||
if no_new_data_count >= 5:
|
||||
logger.info("连续 5 页无新数据,判定为已到底或重复循环,提前结束。")
|
||||
break
|
||||
else:
|
||||
no_new_data_count = 0
|
||||
|
||||
BIN
Apps/XinDianTu/Report/Excel/新电途_export_20260113_120046.xlsx
Normal file
BIN
Apps/XinDianTu/Report/Excel/新电途_export_20260113_120046.xlsx
Normal file
Binary file not shown.
Reference in New Issue
Block a user