'commit'
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
|
||||
|
||||
# 采集配置
|
||||
SCROLL_DISTANCE_RATIO = 0.5
|
||||
MAX_STATIONS_COUNT = 30
|
||||
MAX_STATIONS_COUNT = 20
|
||||
FIRST_RUN_ONLY_ONE_STATION = False
|
||||
REDIS_STATION_EXPIRE = 120
|
||||
DATA_RETENTION_DAYS = 365
|
||||
|
||||
@@ -180,52 +180,49 @@ class YeLiTeCrawler(BaseCrawler):
|
||||
if dqdf_pos:
|
||||
logger.info(f"发现价格入口按钮 (阶段性电价/当前电费) {dqdf_pos},点击进入...")
|
||||
d.click(dqdf_pos[0], dqdf_pos[1])
|
||||
await asyncio.sleep(0.5) # 等待列表加载
|
||||
# 删除旧的详情页截图
|
||||
await asyncio.sleep(0.5)
|
||||
if os.path.exists(detail_shot): os.remove(detail_shot)
|
||||
|
||||
# 1. 向下滚动到底 (根据用户反馈:只有不断向下滚动,才能看到00点的)
|
||||
logger.info("正在向下滚动价格列表到底部 (快速多次滚动以尽快看到 00:00)...")
|
||||
max_scroll_down = 10
|
||||
for i in range(max_scroll_down):
|
||||
before_scroll_path = take_screenshot(d, f"scroll_dn_{i}")
|
||||
scroll_x = int(w * 0.5)
|
||||
scroll_top_y = int(h * 0.5)
|
||||
scroll_bottom_y = int(h * 0.8)
|
||||
|
||||
logger.info("正在向上滚动价格列表到顶部 (快速多次滚动以尽快看到 00:00)...")
|
||||
max_scroll_up_to_top = 10
|
||||
for i in range(max_scroll_up_to_top):
|
||||
before_scroll_path = take_screenshot(d, f"scroll_up_{i}")
|
||||
before_scroll_md5 = get_image_content_md5(before_scroll_path)
|
||||
|
||||
d.swipe_ext("up", scale=0.8)
|
||||
d.swipe(scroll_x, scroll_top_y, scroll_x, scroll_bottom_y, 0.2)
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
after_scroll_path = take_screenshot(d, f"scroll_dn_after_{i}")
|
||||
after_scroll_path = take_screenshot(d, f"scroll_up_after_{i}")
|
||||
after_scroll_md5 = get_image_content_md5(after_scroll_path)
|
||||
|
||||
# 清理临时截图
|
||||
if os.path.exists(before_scroll_path): os.remove(before_scroll_path)
|
||||
if os.path.exists(after_scroll_path): os.remove(after_scroll_path)
|
||||
|
||||
if before_scroll_md5 == after_scroll_md5:
|
||||
logger.info(f"价格列表已到达底部 (滚动次数: {i})")
|
||||
logger.info(f"价格列表已到达顶部 (滚动次数: {i})")
|
||||
break
|
||||
|
||||
# 2. 向上滚动并逐页截图 (从底向上抓取)
|
||||
logger.info("正在向上滚动价格列表并逐页截图...")
|
||||
max_scroll_up = 10
|
||||
for p_idx in range(1, max_scroll_up + 1):
|
||||
# 截图当前页
|
||||
logger.info("正在从顶部开始向下逐页截图...")
|
||||
max_scroll_down_pages = 10
|
||||
for p_idx in range(1, max_scroll_down_pages + 1):
|
||||
p_shot = take_screenshot(d, f"detail_price_{clean_station_name(name)}_{int(time.time())}_{p_idx}")
|
||||
|
||||
# 检查是否还能向上滚动
|
||||
before_up_md5 = get_image_content_md5(p_shot)
|
||||
d.swipe_ext("down", scale=0.85)
|
||||
before_dn_md5 = get_image_content_md5(p_shot)
|
||||
d.swipe(scroll_x, scroll_bottom_y, scroll_x, scroll_top_y, 0.2)
|
||||
await asyncio.sleep(0.3)
|
||||
|
||||
# 检查是否还有新内容
|
||||
check_up_shot = take_screenshot(d, f"check_up_{p_idx}")
|
||||
after_up_md5 = get_image_content_md5(check_up_shot)
|
||||
if os.path.exists(check_up_shot): os.remove(check_up_shot)
|
||||
check_dn_shot = take_screenshot(d, f"check_dn_{p_idx}")
|
||||
after_dn_md5 = get_image_content_md5(check_dn_shot)
|
||||
if os.path.exists(check_dn_shot): os.remove(check_dn_shot)
|
||||
|
||||
detail_shots.append(p_shot)
|
||||
|
||||
if before_up_md5 == after_up_md5:
|
||||
logger.info(f"价格列表已到达顶部 (共抓取页数: {p_idx})")
|
||||
if before_dn_md5 == after_dn_md5:
|
||||
logger.info(f"价格列表已到达底部 (共抓取页数: {p_idx})")
|
||||
break
|
||||
|
||||
# 关闭分时段定价列表 (点击屏幕最顶部空白处)
|
||||
@@ -367,7 +364,7 @@ async def main(service=None):
|
||||
finally:
|
||||
await service.log_task_end(task_id, total_count, status, error_msg)
|
||||
# 如果是内部初始化的 service,则关闭
|
||||
if service and not isinstance(service, YiLaiTeService):
|
||||
if service and not isinstance(service, YeLiTeService):
|
||||
await service.close_db()
|
||||
|
||||
async def get_image_md5_async(path):
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user