'commit'
This commit is contained in:
@@ -1,9 +1,11 @@
|
||||
|
||||
# 采集配置
|
||||
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
# 最大采集场站数量,达到此次数后停止采集
|
||||
MAX_STATIONS_COUNT = 100
|
||||
# 采集配置
|
||||
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
# 最大采集场站数量,达到此次数后停止采集
|
||||
MAX_STATIONS_COUNT = 100
|
||||
FIRST_RUN_ONLY_ONE_STATION = False
|
||||
FIRST_RUN_ONLY_ONE_STATION = False
|
||||
# 场站去重过期时间(秒),在此时间内重复出现的场站不会再次点击进入详情页
|
||||
REDIS_STATION_EXPIRE = 120
|
||||
# 数据库数据保留时长(天),超过此时长的历史数据(is_current=0)将被删除
|
||||
|
||||
@@ -26,7 +26,8 @@ from Apps.AiTeJiYiChong.Config.Setting import (
|
||||
WAIT_AFTER_SCROLL,
|
||||
SAFE_EXCLUDE_RATIO,
|
||||
BOTTOM_SAFE_EXCLUDE_RATIO,
|
||||
TEST_CLEAR_REDIS
|
||||
TEST_CLEAR_REDIS,
|
||||
FIRST_RUN_ONLY_ONE_STATION
|
||||
)
|
||||
|
||||
logger = logging.getLogger("AiTeJiYiChongCrawler")
|
||||
@@ -289,6 +290,10 @@ async def get_station_list(d, service, max_stations_count=MAX_STATIONS_COUNT):
|
||||
full_name_key = f"crawled:aite:{Kit.clean_station_name(station_name)}"
|
||||
await redis_kit.set_data(full_name_key, "1", expire=REDIS_STATION_EXPIRE)
|
||||
|
||||
if FIRST_RUN_ONLY_ONE_STATION:
|
||||
logger.info("已完成首个场站的全流程采集,根据配置退出艾特吉易充爬取任务。")
|
||||
break
|
||||
|
||||
# 如果内层循环已达上限,外层循环也应退出,避免不必要的滑动
|
||||
if total_encountered_count >= max_stations_count:
|
||||
break
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
|
||||
# 采集配置
|
||||
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
|
||||
# 调大步长 (从 0.25 调至 0.5),提高采集效率
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
# 最大采集场站数量,达到此次数后停止采集
|
||||
MAX_STATIONS_COUNT = 100
|
||||
# 采集配置
|
||||
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
|
||||
# 调大步长 (从 0.25 调至 0.5),提高采集效率
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
# 最大采集场站数量,达到此次数后停止采集
|
||||
MAX_STATIONS_COUNT = 100
|
||||
FIRST_RUN_ONLY_ONE_STATION = False
|
||||
# 场站去重过期时间(秒),在此时间内重复出现的场站不会再次点击进入详情页
|
||||
REDIS_STATION_EXPIRE = 120
|
||||
|
||||
@@ -27,4 +28,4 @@ BOTTOM_SAFE_EXCLUDE_RATIO = 0.12
|
||||
# 默认回退屏幕宽度,当无法自动获取设备信息时使用
|
||||
FALLBACK_WIDTH = 1080
|
||||
# 默认回退屏幕高度,当无法自动获取设备信息时使用
|
||||
FALLBACK_HEIGHT = 2400
|
||||
FALLBACK_HEIGHT = 2400
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
|
||||
# 采集配置
|
||||
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
# 最大采集场站数量,达到此次数后停止采集
|
||||
MAX_STATIONS_COUNT = 100
|
||||
# 采集配置
|
||||
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
# 最大采集场站数量,达到此次数后停止采集
|
||||
MAX_STATIONS_COUNT = 100
|
||||
FIRST_RUN_ONLY_ONE_STATION = False
|
||||
# 场站去重过期时间(秒),在此时间内重复出现的场站不会再次点击进入详情页
|
||||
REDIS_STATION_EXPIRE = 120
|
||||
# 数据库数据保留时长(天),超过此时长的历史数据(is_current=0)将被删除
|
||||
@@ -25,14 +26,15 @@ WAIT_DETAIL_PAGE_LOAD = 2.0
|
||||
# 从详情页返回列表页后等待页面刷新的时间
|
||||
WAIT_BACK_TO_LIST = 0.8
|
||||
# 执行滑动操作后等待页面内容加载和稳定的时间
|
||||
WAIT_AFTER_SCROLL = 2.0
|
||||
|
||||
# 坐标计算与安全防护
|
||||
WAIT_AFTER_SCROLL = 2.0
|
||||
|
||||
# 坐标计算与安全防护
|
||||
# 屏幕顶部安全排除比例 (0.0~1.0),此比例区域内不进行点击(避开状态栏、顶部菜单、横幅广告等)
|
||||
SAFE_EXCLUDE_RATIO = 0.15
|
||||
# 屏幕底部安全排除比例 (0.0~1.0),此比例区域内不进行点击(避开底部导航栏、功能按钮等)
|
||||
BOTTOM_SAFE_EXCLUDE_RATIO = 0.10
|
||||
# 默认回退屏幕宽度,当无法自动获取设备信息时使用
|
||||
FALLBACK_WIDTH = 1080
|
||||
# 默认回退屏幕高度,当无法自动获取设备信息时使用
|
||||
FALLBACK_HEIGHT = 2400
|
||||
# 默认回退屏幕高度,当无法自动获取设备信息时使用
|
||||
FALLBACK_HEIGHT = 2400
|
||||
FIRST_RUN_ONLY_ONE_STATION = False
|
||||
|
||||
@@ -12,7 +12,8 @@ from Apps.YeLiTe.Service import YiLaiTeService
|
||||
from Apps.YeLiTe.Config.Setting import (
|
||||
SCROLL_DISTANCE_RATIO, WAIT_AFTER_SCROLL, MAX_STATIONS_COUNT,
|
||||
WAIT_DETAIL_PAGE_LOAD, WAIT_BACK_TO_LIST, TEST_CLEAR_REDIS,
|
||||
SAFE_EXCLUDE_RATIO, BOTTOM_SAFE_EXCLUDE_RATIO
|
||||
SAFE_EXCLUDE_RATIO, BOTTOM_SAFE_EXCLUDE_RATIO,
|
||||
FIRST_RUN_ONLY_ONE_STATION
|
||||
)
|
||||
from Util.RedisKit import RedisKit
|
||||
from Core.BaseCrawler import BaseCrawler
|
||||
@@ -238,6 +239,10 @@ class YiLaiTeCrawler(BaseCrawler):
|
||||
logger.info("似乎还在二级页面,尝试再次返回...")
|
||||
d.press("back")
|
||||
await asyncio.sleep(WAIT_BACK_TO_LIST)
|
||||
|
||||
if FIRST_RUN_ONLY_ONE_STATION:
|
||||
logger.info("已完成首个场站的全流程采集,根据配置退出驿来特爬取任务。")
|
||||
return processed_count
|
||||
else:
|
||||
logger.warning(f"点击场站 {name} 后页面似乎未跳转,跳过返回操作")
|
||||
if os.path.exists(detail_shot): os.remove(detail_shot)
|
||||
|
||||
Reference in New Issue
Block a user