This commit is contained in:
HuangHai
2026-01-16 20:39:49 +08:00
parent 565489eadd
commit 07741bcf4b
4 changed files with 25 additions and 28 deletions

View File

@@ -1,7 +1,7 @@
# 采集配置
SCROLL_DISTANCE_RATIO = 0.5
MAX_STATIONS_COUNT = 30
MAX_STATIONS_COUNT = 20
FIRST_RUN_ONLY_ONE_STATION = False
REDIS_STATION_EXPIRE = 120
DATA_RETENTION_DAYS = 365

View File

@@ -180,52 +180,49 @@ class YeLiTeCrawler(BaseCrawler):
if dqdf_pos:
logger.info(f"发现价格入口按钮 (阶段性电价/当前电费) {dqdf_pos},点击进入...")
d.click(dqdf_pos[0], dqdf_pos[1])
await asyncio.sleep(0.5) # 等待列表加载
# 删除旧的详情页截图
await asyncio.sleep(0.5)
if os.path.exists(detail_shot): os.remove(detail_shot)
# 1. 向下滚动到底 (根据用户反馈只有不断向下滚动才能看到00点的)
logger.info("正在向下滚动价格列表到底部 (快速多次滚动以尽快看到 00:00)...")
max_scroll_down = 10
for i in range(max_scroll_down):
before_scroll_path = take_screenshot(d, f"scroll_dn_{i}")
scroll_x = int(w * 0.5)
scroll_top_y = int(h * 0.5)
scroll_bottom_y = int(h * 0.8)
logger.info("正在向上滚动价格列表到顶部 (快速多次滚动以尽快看到 00:00)...")
max_scroll_up_to_top = 10
for i in range(max_scroll_up_to_top):
before_scroll_path = take_screenshot(d, f"scroll_up_{i}")
before_scroll_md5 = get_image_content_md5(before_scroll_path)
d.swipe_ext("up", scale=0.8)
d.swipe(scroll_x, scroll_top_y, scroll_x, scroll_bottom_y, 0.2)
await asyncio.sleep(0.3)
after_scroll_path = take_screenshot(d, f"scroll_dn_after_{i}")
after_scroll_path = take_screenshot(d, f"scroll_up_after_{i}")
after_scroll_md5 = get_image_content_md5(after_scroll_path)
# 清理临时截图
if os.path.exists(before_scroll_path): os.remove(before_scroll_path)
if os.path.exists(after_scroll_path): os.remove(after_scroll_path)
if before_scroll_md5 == after_scroll_md5:
logger.info(f"价格列表已到达部 (滚动次数: {i})")
logger.info(f"价格列表已到达部 (滚动次数: {i})")
break
# 2. 向上滚动并逐页截图 (从底向上抓取)
logger.info("正在向上滚动价格列表并逐页截图...")
max_scroll_up = 10
for p_idx in range(1, max_scroll_up + 1):
# 截图当前页
logger.info("正在从顶部开始向下逐页截图...")
max_scroll_down_pages = 10
for p_idx in range(1, max_scroll_down_pages + 1):
p_shot = take_screenshot(d, f"detail_price_{clean_station_name(name)}_{int(time.time())}_{p_idx}")
# 检查是否还能向上滚动
before_up_md5 = get_image_content_md5(p_shot)
d.swipe_ext("down", scale=0.85)
before_dn_md5 = get_image_content_md5(p_shot)
d.swipe(scroll_x, scroll_bottom_y, scroll_x, scroll_top_y, 0.2)
await asyncio.sleep(0.3)
# 检查是否还有新内容
check_up_shot = take_screenshot(d, f"check_up_{p_idx}")
after_up_md5 = get_image_content_md5(check_up_shot)
if os.path.exists(check_up_shot): os.remove(check_up_shot)
check_dn_shot = take_screenshot(d, f"check_dn_{p_idx}")
after_dn_md5 = get_image_content_md5(check_dn_shot)
if os.path.exists(check_dn_shot): os.remove(check_dn_shot)
detail_shots.append(p_shot)
if before_up_md5 == after_up_md5:
logger.info(f"价格列表已到达部 (共抓取页数: {p_idx})")
if before_dn_md5 == after_dn_md5:
logger.info(f"价格列表已到达部 (共抓取页数: {p_idx})")
break
# 关闭分时段定价列表 (点击屏幕最顶部空白处)
@@ -367,7 +364,7 @@ async def main(service=None):
finally:
await service.log_task_end(task_id, total_count, status, error_msg)
# 如果是内部初始化的 service则关闭
if service and not isinstance(service, YiLaiTeService):
if service and not isinstance(service, YeLiTeService):
await service.close_db()
async def get_image_md5_async(path):