This commit is contained in:
HuangHai
2026-01-14 07:55:31 +08:00
parent b0d7afe420
commit 95e9445dfa
6 changed files with 77 additions and 14 deletions

View File

@@ -15,6 +15,6 @@ WAIT_BACK_TO_LIST = 1.0
WAIT_AFTER_SCROLL = 2.5
# 坐标计算与安全防护
SAFE_EXCLUDE_RATIO = 0.35 # 调整顶部排除比例,避开搜索、活动 Banner、快捷入口和广告位
BOTTOM_SAFE_EXCLUDE_RATIO = 0.12 # 略微增加底部排除比例,避开底部导航栏
MIN_CARD_HEIGHT = 150 # 场站卡片最小高度,防止识别不完整的卡片
SAFE_EXCLUDE_RATIO = 0.55 # 大幅增加排除比例,确保从过滤器下方开始识别
BOTTOM_SAFE_EXCLUDE_RATIO = 0.12
MIN_CARD_HEIGHT = 250 # 增加最小高度要求确保卡片信息完整特来电卡片较长约300px

View File

@@ -77,12 +77,17 @@ class TeLaiDianCrawler(BaseCrawler):
if not name or not point:
continue
logger.info(f"处理场站: {name}")
logger.info(f"处理场站: {name} (坐标: {point})")
# 点击进入详情
d.click(point[0], point[1])
logger.info(f"已点击场站 '{name}',等待 {WAIT_DETAIL_PAGE_LOAD}s 加载详情页...")
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
# 截图验证是否进入详情页 (可选:可以加入更复杂的页面特征判断)
detail_check_path = take_screenshot(d, f"tld_detail_check_{int(time.time())}.jpg")
logger.info(f"详情页快照已保存: {detail_check_path}")
# 爬取详情
await self.crawl_detail_logic(d, station)
@@ -101,20 +106,55 @@ class TeLaiDianCrawler(BaseCrawler):
"""
在详情页提取价格和状态信息
"""
screenshot_path = take_screenshot(d, f"tld_detail_{int(time.time())}.jpg")
# 1. 识别第一屏的基础信息 (名称、精确地址)
first_screen_path = take_screenshot(d, f"tld_detail_basic_{int(time.time())}.jpg")
basic_info = await self.read_image_kit.analyze_detail_basic_info(first_screen_path)
# 1. 提取价格
prices = await self.read_image_kit.analyze_detail_price(screenshot_path)
station_name = basic_info.get("name") or station_info.get("name")
address = basic_info.get("address") or station_info.get("address")
logger.info(f"详情页基础信息识别完成: {station_name} | {address}")
# 2. 向上滑动以露出价格按钮
logger.info("执行滑动操作以显示价格按钮...")
# 从屏幕中间向上滑动scale=0.6 表示滑动距离约为屏幕高度的 60%
d.swipe_ext("up", scale=0.6)
await asyncio.sleep(1.5)
# 3. 点击“价格信息”按钮 (jgxx.jpg)
template_path = os.path.join(project_root, "Apps", "TeLaiDian", "Template", "jgxx.jpg")
logger.info(f"尝试点击价格详情按钮: {template_path}")
# 2. 保存数据
try:
# 使用 uiautomator2 的图像识别点击
match = d.image.match(template_path)
if match:
logger.info(f"找到价格按钮,坐标: {match['point']}")
d.image.click(template_path)
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
else:
logger.warning("未找到价格按钮模板,尝试备选方案:直接点击屏幕下方区域")
# 备选方案:如果模板匹配失败,尝试点击屏幕中下方
w, h = d.window_size()
d.click(w // 2, int(h * 0.8))
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
except Exception as e:
logger.error(f"点击价格按钮失败: {e}")
# 4. 截图并分析价格表
price_screen_path = take_screenshot(d, f"tld_detail_price_{int(time.time())}.jpg")
prices = await self.read_image_kit.analyze_detail_price(price_screen_path)
# 5. 保存数据
if prices:
station_name = clean_station_name(station_info.get("name"))
address = station_info.get("address")
logger.info(f"场站 {station_name} 提取到 {len(prices)} 条价格信息,准备保存...")
await self.service.save_station_data(station_name, address, prices)
station_name_clean = clean_station_name(station_name)
logger.info(f"场站 {station_name_clean} 提取到 {len(prices)} 条价格信息,准备保存...")
await self.service.save_station_data(station_name_clean, address, prices)
else:
logger.warning(f"未能从 {price_screen_path} 提取到价格信息")
if os.path.exists(screenshot_path): os.remove(screenshot_path)
# 清理临时截图
for p in [first_screen_path, price_screen_path]:
if os.path.exists(p): os.remove(p)
async def crawl_list(self):
"""

View File

@@ -77,6 +77,29 @@ class ReadImageKit:
logger.error(f"分析电价详情失败: {e}")
return []
async def analyze_detail_basic_info(self, image_path):
"""
分析详情页首屏截图,提取场站名称和精确地址
"""
prompt = """
分析这张充电站详情页首屏截图,提取:
1. 场站名称 (通常在页面中部,大字体)
2. 详细地址 (通常在名称下方或页面下半部分,伴有地址图标)
输出格式为 JSON
{
"name": "xxx充电站",
"address": "xxx省xxx市xxx区xxx路xxx号"
}
"""
try:
res_text = await self.vlm.analyze_image(image_path, prompt)
json_str = self.vlm.extract_json(res_text)
return json.loads(json_str)
except Exception as e:
logger.error(f"分析详情页基础信息失败: {e}")
return {}
async def analyze_station_list(self, image_path):
"""
分析场站列表页图片,提取场站位置和基本信息