566 lines
26 KiB
Python
566 lines
26 KiB
Python
# coding=utf-8
|
||
import asyncio
|
||
import os
|
||
import sys
|
||
import time
|
||
import json
|
||
import cv2
|
||
from Apps.TeLaiDian.Kit import (
|
||
take_screenshot, get_image_content_md5, clean_station_name,
|
||
setup_logger, read_image, save_image, detect_warm_popup_xczs_cv
|
||
)
|
||
from Apps.TeLaiDian.ReadImageKit import ReadImageKit
|
||
from Apps.TeLaiDian.Service import TeLaiDianService
|
||
from Apps.TeLaiDian.Config.Setting import (
|
||
SCROLL_DISTANCE_RATIO, WAIT_AFTER_SCROLL, MAX_STATIONS_COUNT,
|
||
SAFE_EXCLUDE_RATIO, BOTTOM_SAFE_EXCLUDE_RATIO, WAIT_DETAIL_PAGE_LOAD,
|
||
WAIT_BACK_TO_LIST, DETAIL_SCROLL_DISTANCE_RATIO, FIRST_RUN_ONLY_ONE_STATION,
|
||
REDIS_STATION_EXPIRE
|
||
)
|
||
from Core.BaseCrawler import BaseCrawler
|
||
from Util.RedisKit import RedisKit
|
||
import uiautomator2 as u2
|
||
|
||
# 项目根目录处理
|
||
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||
if project_root not in sys.path:
|
||
sys.path.append(project_root)
|
||
|
||
# 初始化日志
|
||
logger = setup_logger("TeLaiDianCrawler")
|
||
|
||
class TeLaiDianCrawler(BaseCrawler):
|
||
def __init__(self, service=None):
|
||
super().__init__(service or TeLaiDianService())
|
||
self.read_image_kit = ReadImageKit()
|
||
self.redis_kit = RedisKit()
|
||
self.pending_price_tasks = []
|
||
self.vlm_concurrency = 3
|
||
|
||
async def start(self):
|
||
"""
|
||
实现 BaseCrawler 的启动入口
|
||
注意:在 T4 等全流程脚本中,小程序由 Opener 提前打开,
|
||
此处直接开始爬取逻辑。
|
||
"""
|
||
d = u2.connect()
|
||
await self.crawl_list_logic(d)
|
||
|
||
async def open_app(self):
|
||
"""
|
||
打开特来电小程序
|
||
"""
|
||
from Apps.TeLaiDian import Opener
|
||
return await Opener.open_mini_program()
|
||
|
||
async def clear_ads(self, d, max_rounds=3):
|
||
"""
|
||
清理页面上的广告弹窗,支持多轮检测
|
||
"""
|
||
logger.info(f"--- [广告清理] 开始检测,最多尝试 {max_rounds} 轮 ---")
|
||
for i in range(max_rounds):
|
||
round_idx = i + 1
|
||
logger.info(f"[广告清理] 第 {round_idx} 轮:正在截屏分析...")
|
||
ad_screen = take_screenshot(d, f"tld_ad_check_r{round_idx}_{int(time.time())}.jpg")
|
||
|
||
try:
|
||
res = await self.read_image_kit.find_close_button_vlm(ad_screen)
|
||
|
||
if res.get("has_ad") and res.get("close_point"):
|
||
close_point = res.get("close_point")
|
||
reason = res.get("reason", "未提供原因")
|
||
w, h = d.window_size()
|
||
target_x = int(close_point[0] * w / 1000)
|
||
target_y = int(close_point[1] * h / 1000)
|
||
|
||
logger.info(f"[广告清理] 第 {round_idx} 轮:VLM 发现广告!原因: {reason}")
|
||
logger.info(f"[广告清理] 计划点击坐标: ({target_x}, {target_y}),归一化坐标: {close_point}")
|
||
|
||
# 安全校验:绝对不能点击微信小程序的胶囊按钮区 (右上角)
|
||
if target_x > w * 0.75 and target_y < 150:
|
||
logger.warning(f"[广告清理] ⚠️ 拒绝点击疑似微信胶囊按钮的区域: ({target_x}, {target_y}),跳过本轮。")
|
||
continue
|
||
|
||
logger.info(f"[广告清理] 正在执行点击关闭操作...")
|
||
d.click(target_x, target_y)
|
||
# 点击后等待一下,让弹窗消失或下一轮广告弹出
|
||
await asyncio.sleep(2.0)
|
||
else:
|
||
logger.info(f"[广告清理] 第 {round_idx} 轮:未发现广告弹窗。VLM 理由: {res.get('reason', '无')}")
|
||
if os.path.exists(ad_screen): os.remove(ad_screen)
|
||
break
|
||
except Exception as e:
|
||
logger.error(f"[广告清理] 第 {round_idx} 轮检测发生异常: {e}")
|
||
finally:
|
||
if os.path.exists(ad_screen): os.remove(ad_screen)
|
||
|
||
logger.info("--- [广告清理] 任务结束 ---")
|
||
|
||
async def crawl_list_logic(self, d):
|
||
# 1. 启动即清理广告 (已根据要求关闭)
|
||
# await self.clear_ads(d, max_rounds=3)
|
||
|
||
# [优化] 向下滚动以刷新/校准地理位置
|
||
# 使用更加显式的 swipe 方式:从屏幕 30% 划到 80%
|
||
popup_screen_path = take_screenshot(d, f"tld_detail_popup_{int(time.time())}.jpg")
|
||
logger.info(f"[详情页] 截图用于检测温馨提示弹窗: {popup_screen_path}")
|
||
|
||
# 使用 OCR 探测“下次再说”按钮 (替代原来的模板匹配)
|
||
ocr_res = detect_warm_popup_xczs_cv(popup_screen_path)
|
||
ocr_point = None
|
||
if ocr_res:
|
||
w, h = d.window_size()
|
||
ocr_point = (
|
||
int((ocr_res[0] + ocr_res[2]) / 2 * w / 1000),
|
||
int((ocr_res[1] + ocr_res[3]) / 2 * h / 1000)
|
||
)
|
||
logger.info(f"[详情页] OCR 检测到“下次再说”按钮位置: {ocr_point}")
|
||
else:
|
||
logger.info("[详情页] OCR 未能检测到“下次再说”按钮")
|
||
|
||
vlm_popup = {"has_popup": False}
|
||
try:
|
||
vlm_popup = await self.read_image_kit.check_warm_popup_vlm(popup_screen_path)
|
||
except Exception as e:
|
||
logger.error(f"[详情页] VLM 检测温馨提示弹窗失败: {e}")
|
||
|
||
has_vlm_popup = isinstance(vlm_popup, dict) and vlm_popup.get("has_popup")
|
||
if ocr_point and has_vlm_popup:
|
||
click_x, click_y = ocr_point
|
||
logger.info(f"[详情页] OCR 与 VLM 均确认存在温馨提示弹窗,即将点击“下次再说”按钮: ({click_x}, {click_y})")
|
||
debug_popup_path = popup_screen_path.replace(".jpg", f"_xczs_click_{click_x}_{click_y}.jpg")
|
||
try:
|
||
img_popup = read_image(popup_screen_path)
|
||
if img_popup is not None:
|
||
cv2.circle(img_popup, (click_x, click_y), 20, (0, 0, 255), -1)
|
||
save_image(debug_popup_path, img_popup)
|
||
logger.info(f"[详情页] 已生成温馨提示弹窗点击诊断图片: {debug_popup_path}")
|
||
except Exception as e:
|
||
logger.error(f"[详情页] 生成温馨提示弹窗诊断图片失败: {e}")
|
||
try:
|
||
d.click(click_x, click_y)
|
||
await asyncio.sleep(1.5)
|
||
except Exception as e:
|
||
logger.error(f"[详情页] 点击温馨提示“下次再说”失败: {e}")
|
||
else:
|
||
logger.info(f"[详情页] 温馨提示弹窗未通过双重确认,OCR检测: {bool(ocr_point)} | VLM 检测: {vlm_popup}")
|
||
|
||
w, h = d.window_size()
|
||
logger.info(f"执行显式下拉刷新操作: (x={w//2}, y1={int(h*0.3)} -> y2={int(h*0.8)})")
|
||
d.swipe(w // 2, int(h * 0.3), w // 2, int(h * 0.8), duration=0.5)
|
||
|
||
logger.info(f"等待 {WAIT_AFTER_SCROLL} 秒确保位置校准和列表刷新完成...")
|
||
await asyncio.sleep(WAIT_AFTER_SCROLL)
|
||
|
||
max_to_crawl = 1 if FIRST_RUN_ONLY_ONE_STATION else MAX_STATIONS_COUNT
|
||
processed_count = 0
|
||
last_md5 = None
|
||
|
||
while processed_count < max_to_crawl:
|
||
# 1. 截图并分析
|
||
screenshot_path = take_screenshot(d, f"tld_list_{int(time.time())}.jpg")
|
||
|
||
# 检测是否滚动到底部
|
||
curr_md5 = get_image_content_md5(screenshot_path, top_ratio=SAFE_EXCLUDE_RATIO, bottom_ratio=BOTTOM_SAFE_EXCLUDE_RATIO)
|
||
if last_md5 == curr_md5:
|
||
logger.info("内容无变化,判定已到底部")
|
||
if os.path.exists(screenshot_path): os.remove(screenshot_path)
|
||
break
|
||
last_md5 = curr_md5
|
||
|
||
stations = await self.read_image_kit.analyze_station_list(screenshot_path)
|
||
if not stations:
|
||
# 检查是否意外退出了小程序
|
||
is_wrong_page = await self.check_wrong_page(d, screenshot_path)
|
||
if is_wrong_page:
|
||
logger.error("检测到已退出详情列表页(可能回到了搜索页),尝试重新进入...")
|
||
await self.open_app()
|
||
await asyncio.sleep(5)
|
||
continue
|
||
|
||
logger.info("本页未检测到场站,尝试滑动...")
|
||
d.swipe_ext("up", scale=SCROLL_DISTANCE_RATIO)
|
||
await asyncio.sleep(WAIT_AFTER_SCROLL)
|
||
continue
|
||
|
||
for station in stations:
|
||
if processed_count >= max_to_crawl:
|
||
break
|
||
|
||
name = station.get("name")
|
||
point = station.get("point")
|
||
|
||
if not name or not point:
|
||
continue
|
||
|
||
# [优化] 使用 Redis 进行跨运行去重
|
||
cleaned_name = clean_station_name(name)
|
||
redis_key = f"crawled:tld:{cleaned_name}"
|
||
if await self.redis_kit.get_data(redis_key):
|
||
logger.info(f"跳过已处理场站 (Redis): {name}")
|
||
continue
|
||
|
||
current_idx = processed_count + 1
|
||
remaining = max_to_crawl - current_idx
|
||
logger.info(f"--- [进度: {current_idx}/{max_to_crawl}, 剩余: {remaining}] 处理场站: {name} (坐标: {point}, 距离: {station.get('distance')}) ---")
|
||
|
||
# 点击进入详情
|
||
d.click(point[0], point[1])
|
||
logger.info(f"已点击场站 '{name}',等待 {WAIT_DETAIL_PAGE_LOAD}s 加载详情页...")
|
||
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
|
||
|
||
# 截图验证是否进入详情页
|
||
detail_check_path = take_screenshot(d, f"tld_detail_check_{int(time.time())}.jpg")
|
||
logger.info(f"详情页快照已保存: {detail_check_path}")
|
||
|
||
# 简单验证:如果标题包含 "我的卡券"、"优惠券"、"新人福利" 等,说明点错了
|
||
is_wrong_page = await self.check_wrong_page(d, detail_check_path)
|
||
if is_wrong_page:
|
||
logger.warning(f"检测到进入了错误页面,尝试返回列表...")
|
||
# 尝试点击左上角的返回箭头,如果没有,则执行系统 back
|
||
d.click(40, 70) # 特来电通常左上角有返回箭头
|
||
await asyncio.sleep(1.0)
|
||
d.press("back")
|
||
await asyncio.sleep(WAIT_BACK_TO_LIST)
|
||
if os.path.exists(detail_check_path): os.remove(detail_check_path)
|
||
continue
|
||
|
||
# 爬取详情
|
||
await self.crawl_detail_logic(d, station)
|
||
if os.path.exists(detail_check_path): os.remove(detail_check_path)
|
||
|
||
# 标记为已处理
|
||
await self.redis_kit.set_data(redis_key, "1", expire=REDIS_STATION_EXPIRE)
|
||
|
||
d.press("back")
|
||
await asyncio.sleep(WAIT_BACK_TO_LIST)
|
||
d.press("back")
|
||
await asyncio.sleep(WAIT_BACK_TO_LIST)
|
||
processed_count += 1
|
||
if FIRST_RUN_ONLY_ONE_STATION:
|
||
logger.info("已完成首个场站的全流程采集,根据配置退出爬取任务。")
|
||
if os.path.exists(screenshot_path):
|
||
os.remove(screenshot_path)
|
||
if self.pending_price_tasks:
|
||
logger.info(f"[收尾] 等待后台价格任务完成,共 {len(self.pending_price_tasks)} 个...")
|
||
try:
|
||
await asyncio.gather(*self.pending_price_tasks, return_exceptions=True)
|
||
finally:
|
||
self.pending_price_tasks.clear()
|
||
logger.info("[收尾] 后台价格任务已全部完成")
|
||
return
|
||
|
||
# 滑动到下一页
|
||
d.swipe_ext("up", scale=SCROLL_DISTANCE_RATIO)
|
||
await asyncio.sleep(WAIT_AFTER_SCROLL)
|
||
|
||
if os.path.exists(screenshot_path): os.remove(screenshot_path)
|
||
|
||
if self.pending_price_tasks:
|
||
logger.info(f"[收尾] 等待后台价格任务完成,共 {len(self.pending_price_tasks)} 个...")
|
||
try:
|
||
await asyncio.gather(*self.pending_price_tasks, return_exceptions=True)
|
||
finally:
|
||
self.pending_price_tasks.clear()
|
||
logger.info("[收尾] 后台价格任务已全部完成")
|
||
|
||
async def check_wrong_page(self, d, image_path):
|
||
"""
|
||
检查是否误触进入了错误的页面(如:我的卡券、活动页等)
|
||
"""
|
||
data = await self.read_image_kit.check_wrong_page_vlm(image_path)
|
||
is_detail = data.get("is_detail_page", True)
|
||
if not is_detail:
|
||
logger.warning(f"⚠️ 确认进入错误页面: {data.get('page_type')} ({data.get('reason')})")
|
||
return not is_detail
|
||
|
||
async def crawl_detail_logic(self, d, station_info):
|
||
"""
|
||
在详情页提取价格和状态信息
|
||
"""
|
||
first_screen_path = take_screenshot(d, f"tld_detail_basic_{int(time.time())}.jpg")
|
||
station_name = station_info.get("name")
|
||
address = station_info.get("address")
|
||
distance = station_info.get("distance")
|
||
total_piles = None
|
||
free_piles = None
|
||
piles_detail = None
|
||
parking_info = None
|
||
submitted_price_task = False
|
||
|
||
logger.info(f"[详情页] 进入 crawl_detail_logic,场站: {station_name} | 地址: {address}")
|
||
logger.info(f"[详情页] 已截取首屏截图,准备识别基础信息: {first_screen_path}")
|
||
|
||
try:
|
||
basic_info = await self.read_image_kit.analyze_detail_basic_info(first_screen_path)
|
||
if isinstance(basic_info, dict):
|
||
if basic_info.get("name"):
|
||
station_name = basic_info.get("name")
|
||
if basic_info.get("address"):
|
||
address = basic_info.get("address")
|
||
|
||
# 提取电桩信息
|
||
total_piles = basic_info.get("total_piles")
|
||
free_piles = basic_info.get("free_piles")
|
||
piles_detail = basic_info.get("piles_detail")
|
||
parking_info = basic_info.get("parking_info")
|
||
|
||
if total_piles is None and isinstance(station_info, dict):
|
||
tp = station_info.get("total_piles")
|
||
if tp is not None:
|
||
total_piles = tp
|
||
if free_piles is None and isinstance(station_info, dict):
|
||
fp = station_info.get("free_piles")
|
||
if fp is not None:
|
||
free_piles = fp
|
||
|
||
logger.info(f"[详情页] 基础信息识别结果: {station_name} | {address} | 桩数: {total_piles}/{free_piles} | 停车费: {parking_info}")
|
||
except Exception as ex:
|
||
logger.error(f"[详情页] 同步分析详情页基础信息失败: {ex}")
|
||
finally:
|
||
if os.path.exists(first_screen_path):
|
||
try:
|
||
os.remove(first_screen_path)
|
||
except:
|
||
pass
|
||
|
||
w, h = d.window_size()
|
||
|
||
logger.info("[详情页] 根据用户要求:进入页面后多等会,然后通过 OCR 实时探测价格入口 (全部时段/全天价格统一)")
|
||
w, h = d.window_size()
|
||
|
||
# 1. 增加等待时间,确保页面加载完成
|
||
logger.info(f"[详情页] 等待 {WAIT_DETAIL_PAGE_LOAD}s 确保页面稳定...")
|
||
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
|
||
|
||
# --- [优化] 小步快跑滚动查找逻辑 ---
|
||
# 目标:通过多次小幅度滚动 + OCR 实时探测,精准捕获价格入口(“全部时段”或“全天价格统一”),避免滑过头
|
||
max_scroll_attempts = 6 # 最大滚动尝试次数
|
||
scroll_step_ratio = 0.3 # 每次滚动的步长(屏幕高度的 30%)
|
||
found_entry = None
|
||
|
||
logger.info(f"[详情页] 开始“小步快跑”滚动查找价格入口 (全部时段/全天价格统一) (最多 {max_scroll_attempts} 次)...")
|
||
|
||
for i in range(max_scroll_attempts):
|
||
# 1. 截图识别当前屏
|
||
curr_screen = take_screenshot(d, f"tld_scroll_ocr_{i}_{int(time.time())}.jpg")
|
||
|
||
# 2. 尝试 OCR 识别价格入口
|
||
entry_data = await self.read_image_kit.find_price_entrance_ocr(curr_screen)
|
||
|
||
if entry_data.get("found"):
|
||
logger.info(f"[详情页] 第 {i+1} 次尝试:成功探测到价格入口!")
|
||
found_entry = {
|
||
"screen": curr_screen,
|
||
"point": entry_data["point"]
|
||
}
|
||
break
|
||
|
||
# 3. 如果没找到,小幅向上滚动一段距离
|
||
if i < max_scroll_attempts - 1:
|
||
logger.info(f"[详情页] 第 {i+1} 次尝试未找到,小幅向上滚动 (步长: {scroll_step_ratio*100}%)...")
|
||
d.swipe(w // 2, int(h * 0.7), w // 2, int(h * (0.7 - scroll_step_ratio)), duration=0.5)
|
||
await asyncio.sleep(1.5) # 滚动后短暂停留
|
||
|
||
# 清理过程截图
|
||
if os.path.exists(curr_screen):
|
||
try: os.remove(curr_screen)
|
||
except: pass
|
||
|
||
entrance_clicked = False
|
||
try:
|
||
if found_entry:
|
||
price_tab_screen = found_entry["screen"]
|
||
p = found_entry["point"]
|
||
|
||
# 2. 点击价格入口
|
||
entry_x = int(p[0] * w / 1000)
|
||
entry_y = int(p[1] * h / 1000)
|
||
|
||
# 安全校验
|
||
if entry_y > h * 0.9:
|
||
logger.warning(f"[详情页] 入口坐标偏低 ({entry_y}),可能在底部遮罩,尝试微调。")
|
||
|
||
# 绘制最终点击诊断图
|
||
debug_click_path = price_tab_screen.replace(".jpg", "_final_click.jpg")
|
||
img = read_image(price_tab_screen)
|
||
if img is not None:
|
||
cv2.circle(img, (entry_x, entry_y), 25, (0, 255, 0), -1)
|
||
save_image(debug_click_path, img)
|
||
logger.info(f"[详情页] 已生成最终点击诊断图: {debug_click_path}")
|
||
|
||
logger.info(f"[详情页] 正在点击电价入口: ({entry_x}, {entry_y})")
|
||
d.click(entry_x, entry_y)
|
||
entrance_clicked = True
|
||
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
|
||
else:
|
||
# 按照用户要求:找不到文字时输出日志、截图并停止程序
|
||
fail_screen = take_screenshot(d, f"tld_ocr_fail_{int(time.time())}.jpg")
|
||
logger.error(f"❌ [OCR失败] 经过 {max_scroll_attempts} 次滚动仍未在页面中找到价格入口文字 (全部时段/全天价格统一)!")
|
||
logger.error(f"❌ [OCR失败] 最终截图已保存至: {fail_screen}")
|
||
logger.error("❌ [OCR失败] 将不进入电价页,继续以当前信息写入基础数据。")
|
||
except Exception as e:
|
||
logger.error(f"[详情页] 识别或点击价格入口失败: {e}")
|
||
|
||
if entrance_clicked:
|
||
entered_price_path = take_screenshot(d, f"tld_detail_price_after_enter_{int(time.time())}.jpg")
|
||
logger.info(f"[电价页] 入口点击后的电价页截图已保存: {entered_price_path}")
|
||
|
||
await asyncio.sleep(1.0)
|
||
|
||
# 1. 向上滚动到顶部(不断下拉直到看到最上面的 00:00)
|
||
logger.info("正在向上滚动价格列表到顶部 (快速多次滚动以尽快看到 00:00)...")
|
||
max_scroll_up_to_top = 10
|
||
for i in range(max_scroll_up_to_top):
|
||
before_scroll_path = take_screenshot(d, f"scroll_up_{i}.jpg")
|
||
before_scroll_md5 = get_image_content_md5(before_scroll_path)
|
||
|
||
d.swipe_ext("down", scale=0.85)
|
||
await asyncio.sleep(0.5)
|
||
|
||
after_scroll_path = take_screenshot(d, f"scroll_up_after_{i}.jpg")
|
||
after_scroll_md5 = get_image_content_md5(after_scroll_path)
|
||
|
||
if os.path.exists(before_scroll_path): os.remove(before_scroll_path)
|
||
if os.path.exists(after_scroll_path): os.remove(after_scroll_path)
|
||
|
||
if before_scroll_md5 == after_scroll_md5:
|
||
logger.info(f"价格列表已到达顶部 (滚动次数: {i})")
|
||
break
|
||
|
||
# 2. 从顶部开始向下逐页截图
|
||
logger.info("正在从顶部开始向下逐页截图...")
|
||
price_screenshots = []
|
||
max_scroll_down_pages = 8
|
||
for p_idx in range(1, max_scroll_down_pages + 1):
|
||
# 截图当前页
|
||
p_shot = take_screenshot(d, f"tld_detail_price_{int(time.time())}_{p_idx}.jpg")
|
||
|
||
# 检查是否还能向下滚动
|
||
before_dn_md5 = get_image_content_md5(p_shot)
|
||
d.swipe_ext("up", scale=0.8)
|
||
await asyncio.sleep(1.2)
|
||
|
||
# 检查是否还有新内容
|
||
check_dn_path = take_screenshot(d, f"check_dn_{p_idx}.jpg")
|
||
after_dn_md5 = get_image_content_md5(check_dn_path)
|
||
if os.path.exists(check_dn_path): os.remove(check_dn_path)
|
||
|
||
price_screenshots.append(p_shot)
|
||
|
||
if before_dn_md5 == after_dn_md5:
|
||
logger.info(f"价格列表已到达底部 (共抓取页数: {p_idx})")
|
||
break
|
||
|
||
if price_screenshots:
|
||
task = asyncio.create_task(
|
||
self._analyze_and_save_prices_async(
|
||
station_name=station_name,
|
||
address=address,
|
||
distance=distance,
|
||
price_screenshots=price_screenshots,
|
||
total_piles=total_piles,
|
||
free_piles=free_piles,
|
||
piles_detail=piles_detail,
|
||
parking_info=parking_info
|
||
)
|
||
)
|
||
self.pending_price_tasks.append(task)
|
||
submitted_price_task = True
|
||
logger.info(f"[详情页] 已后台提交 {len(price_screenshots)} 张电价截图进行识别与保存,继续后续流程不阻塞。")
|
||
else:
|
||
pass
|
||
|
||
if not submitted_price_task:
|
||
try:
|
||
await self.service.save_station_profile_and_status(
|
||
station_name=station_name,
|
||
address=address,
|
||
total_piles=total_piles,
|
||
free_piles=free_piles,
|
||
piles_detail=piles_detail,
|
||
parking_info=parking_info,
|
||
distance=distance
|
||
)
|
||
logger.info(f"[详情页] 已基于整合信息写入基础数据: {station_name}")
|
||
except Exception as e:
|
||
logger.error(f"[详情页] 写入基础数据失败: {e}")
|
||
|
||
async def crawl_list(self):
|
||
"""
|
||
实现 BaseCrawler 的抽象方法
|
||
"""
|
||
d = u2.connect()
|
||
await self.crawl_list_logic(d)
|
||
|
||
async def crawl_detail(self, station_info):
|
||
"""
|
||
实现 BaseCrawler 的抽象方法
|
||
"""
|
||
# 逻辑已在 crawl_list_logic 中通过 crawl_detail_logic 调用
|
||
pass
|
||
|
||
async def _analyze_and_save_prices_async(self, station_name, address, distance, price_screenshots, total_piles=None, free_piles=None, piles_detail=None, parking_info=None):
|
||
all_prices = []
|
||
sem = asyncio.Semaphore(self.vlm_concurrency)
|
||
|
||
async def analyze_one(path):
|
||
try:
|
||
async with sem:
|
||
prices = await self.read_image_kit.analyze_detail_price_info(path)
|
||
return path, prices
|
||
except Exception as e:
|
||
logger.error(f"[详情页] 异步识别价格失败 ({os.path.basename(path)}): {e}")
|
||
return path, []
|
||
|
||
try:
|
||
tasks = [analyze_one(p) for p in price_screenshots]
|
||
results = await asyncio.gather(*tasks, return_exceptions=False)
|
||
|
||
for path, prices in results:
|
||
if prices:
|
||
all_prices.extend(prices)
|
||
if os.path.exists(path):
|
||
try: os.remove(path)
|
||
except: pass
|
||
|
||
if not all_prices:
|
||
logger.warning(f"[详情页] {station_name} 后台识别未提取到任何价格信息")
|
||
return
|
||
|
||
unique_prices = []
|
||
seen_periods = set()
|
||
for p in all_prices:
|
||
key = f"{p.get('start')}-{p.get('end')}"
|
||
if key not in seen_periods:
|
||
unique_prices.append(p)
|
||
seen_periods.add(key)
|
||
|
||
unique_prices.sort(key=lambda x: x.get("start", "00:00"))
|
||
|
||
await self.service.save_station_data(
|
||
station_name=station_name,
|
||
address=address,
|
||
prices=unique_prices,
|
||
total_piles=total_piles,
|
||
free_piles=free_piles,
|
||
piles_detail=piles_detail,
|
||
parking_info=parking_info,
|
||
distance=distance,
|
||
)
|
||
logger.info(f"[详情页] {station_name} 后台价格信息处理完成,共 {len(unique_prices)} 条时段,并已写入数据库。")
|
||
except Exception as e:
|
||
logger.error(f"[详情页] 后台处理价格截图失败: {e}")
|
||
finally:
|
||
for p_shot in price_screenshots:
|
||
if os.path.exists(p_shot):
|
||
try: os.remove(p_shot)
|
||
except: pass
|
||
|
||
async def main(service=None):
|
||
crawler = TeLaiDianCrawler(service=service)
|
||
await crawler.start()
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main())
|