'commit'
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
|
||||
# 采集配置
|
||||
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
|
||||
# 使用“小步快跑”策略,设置为 0.15 以避开广告并防止跳过场站
|
||||
SCROLL_DISTANCE_RATIO = 0.15
|
||||
# 稍微调大步长 (从 0.15 调至 0.25),兼顾避开广告与采集效率
|
||||
SCROLL_DISTANCE_RATIO = 0.25
|
||||
# 最大滑动/翻页次数,达到此次数后停止采集
|
||||
MAX_SCROLLS = 100
|
||||
# 默认抓取半径(公里),当检测到场站距离超过此值时停止采集
|
||||
|
||||
Binary file not shown.
@@ -270,7 +270,7 @@ async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
|
||||
if detail_ad_res:
|
||||
dad_type = detail_ad_res.get("ad_type")
|
||||
if dad_type == "rabbit":
|
||||
logger.info(">>> 详情页检测到 rabbit 广告,判定为误报,忽略。")
|
||||
logger.info(">>> 详情页检测到 rabbit 广告。根据策略,我们不再尝试关闭它,直接继续。")
|
||||
else:
|
||||
dx, dy = detail_ad_res["x"], detail_ad_res["y"]
|
||||
logger.info(f"检测到详情页弹窗: {dad_type},正在点击关闭 ({dx}, {dy})...")
|
||||
@@ -395,8 +395,9 @@ async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
|
||||
if stations and new_stations_processed == 0:
|
||||
no_new_data_count += 1
|
||||
logger.info(f"本页所有场站均已处理过,连续 {no_new_data_count} 页无新数据。")
|
||||
if no_new_data_count >= 3:
|
||||
logger.info("连续 3 页无新数据,判定为已到底或重复循环,提前结束。")
|
||||
# 【优化】由于滑动步长较小 (0.15),连续多页重复是正常的,将阈值调大到 10
|
||||
if no_new_data_count >= 10:
|
||||
logger.info("连续 10 页无新数据,判定为已到底或重复循环,提前结束。")
|
||||
break
|
||||
else:
|
||||
no_new_data_count = 0
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user