'commit'
This commit is contained in:
@@ -15,6 +15,6 @@ WAIT_BACK_TO_LIST = 1.0
|
||||
WAIT_AFTER_SCROLL = 2.5
|
||||
|
||||
# 坐标计算与安全防护
|
||||
SAFE_EXCLUDE_RATIO = 0.35 # 调整顶部排除比例,避开搜索、活动 Banner、快捷入口和广告位
|
||||
BOTTOM_SAFE_EXCLUDE_RATIO = 0.12 # 略微增加底部排除比例,避开底部导航栏
|
||||
MIN_CARD_HEIGHT = 150 # 场站卡片最小高度,防止识别不完整的卡片
|
||||
SAFE_EXCLUDE_RATIO = 0.55 # 大幅增加排除比例,确保从过滤器下方开始识别
|
||||
BOTTOM_SAFE_EXCLUDE_RATIO = 0.12
|
||||
MIN_CARD_HEIGHT = 250 # 增加最小高度要求,确保卡片信息完整(特来电卡片较长,约300px)
|
||||
|
||||
@@ -77,12 +77,17 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
if not name or not point:
|
||||
continue
|
||||
|
||||
logger.info(f"处理场站: {name}")
|
||||
logger.info(f"处理场站: {name} (坐标: {point})")
|
||||
|
||||
# 点击进入详情
|
||||
d.click(point[0], point[1])
|
||||
logger.info(f"已点击场站 '{name}',等待 {WAIT_DETAIL_PAGE_LOAD}s 加载详情页...")
|
||||
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
|
||||
|
||||
# 截图验证是否进入详情页 (可选:可以加入更复杂的页面特征判断)
|
||||
detail_check_path = take_screenshot(d, f"tld_detail_check_{int(time.time())}.jpg")
|
||||
logger.info(f"详情页快照已保存: {detail_check_path}")
|
||||
|
||||
# 爬取详情
|
||||
await self.crawl_detail_logic(d, station)
|
||||
|
||||
@@ -101,20 +106,55 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
"""
|
||||
在详情页提取价格和状态信息
|
||||
"""
|
||||
screenshot_path = take_screenshot(d, f"tld_detail_{int(time.time())}.jpg")
|
||||
# 1. 识别第一屏的基础信息 (名称、精确地址)
|
||||
first_screen_path = take_screenshot(d, f"tld_detail_basic_{int(time.time())}.jpg")
|
||||
basic_info = await self.read_image_kit.analyze_detail_basic_info(first_screen_path)
|
||||
|
||||
# 1. 提取价格
|
||||
prices = await self.read_image_kit.analyze_detail_price(screenshot_path)
|
||||
station_name = basic_info.get("name") or station_info.get("name")
|
||||
address = basic_info.get("address") or station_info.get("address")
|
||||
logger.info(f"详情页基础信息识别完成: {station_name} | {address}")
|
||||
|
||||
# 2. 向上滑动以露出价格按钮
|
||||
logger.info("执行滑动操作以显示价格按钮...")
|
||||
# 从屏幕中间向上滑动,scale=0.6 表示滑动距离约为屏幕高度的 60%
|
||||
d.swipe_ext("up", scale=0.6)
|
||||
await asyncio.sleep(1.5)
|
||||
|
||||
# 3. 点击“价格信息”按钮 (jgxx.jpg)
|
||||
template_path = os.path.join(project_root, "Apps", "TeLaiDian", "Template", "jgxx.jpg")
|
||||
logger.info(f"尝试点击价格详情按钮: {template_path}")
|
||||
|
||||
# 2. 保存数据
|
||||
try:
|
||||
# 使用 uiautomator2 的图像识别点击
|
||||
match = d.image.match(template_path)
|
||||
if match:
|
||||
logger.info(f"找到价格按钮,坐标: {match['point']}")
|
||||
d.image.click(template_path)
|
||||
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
|
||||
else:
|
||||
logger.warning("未找到价格按钮模板,尝试备选方案:直接点击屏幕下方区域")
|
||||
# 备选方案:如果模板匹配失败,尝试点击屏幕中下方
|
||||
w, h = d.window_size()
|
||||
d.click(w // 2, int(h * 0.8))
|
||||
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD)
|
||||
except Exception as e:
|
||||
logger.error(f"点击价格按钮失败: {e}")
|
||||
|
||||
# 4. 截图并分析价格表
|
||||
price_screen_path = take_screenshot(d, f"tld_detail_price_{int(time.time())}.jpg")
|
||||
prices = await self.read_image_kit.analyze_detail_price(price_screen_path)
|
||||
|
||||
# 5. 保存数据
|
||||
if prices:
|
||||
station_name = clean_station_name(station_info.get("name"))
|
||||
address = station_info.get("address")
|
||||
|
||||
logger.info(f"场站 {station_name} 提取到 {len(prices)} 条价格信息,准备保存...")
|
||||
await self.service.save_station_data(station_name, address, prices)
|
||||
station_name_clean = clean_station_name(station_name)
|
||||
logger.info(f"场站 {station_name_clean} 提取到 {len(prices)} 条价格信息,准备保存...")
|
||||
await self.service.save_station_data(station_name_clean, address, prices)
|
||||
else:
|
||||
logger.warning(f"未能从 {price_screen_path} 提取到价格信息")
|
||||
|
||||
if os.path.exists(screenshot_path): os.remove(screenshot_path)
|
||||
# 清理临时截图
|
||||
for p in [first_screen_path, price_screen_path]:
|
||||
if os.path.exists(p): os.remove(p)
|
||||
|
||||
async def crawl_list(self):
|
||||
"""
|
||||
|
||||
@@ -77,6 +77,29 @@ class ReadImageKit:
|
||||
logger.error(f"分析电价详情失败: {e}")
|
||||
return []
|
||||
|
||||
async def analyze_detail_basic_info(self, image_path):
|
||||
"""
|
||||
分析详情页首屏截图,提取场站名称和精确地址
|
||||
"""
|
||||
prompt = """
|
||||
分析这张充电站详情页首屏截图,提取:
|
||||
1. 场站名称 (通常在页面中部,大字体)
|
||||
2. 详细地址 (通常在名称下方或页面下半部分,伴有地址图标)
|
||||
|
||||
输出格式为 JSON:
|
||||
{
|
||||
"name": "xxx充电站",
|
||||
"address": "xxx省xxx市xxx区xxx路xxx号"
|
||||
}
|
||||
"""
|
||||
try:
|
||||
res_text = await self.vlm.analyze_image(image_path, prompt)
|
||||
json_str = self.vlm.extract_json(res_text)
|
||||
return json.loads(json_str)
|
||||
except Exception as e:
|
||||
logger.error(f"分析详情页基础信息失败: {e}")
|
||||
return {}
|
||||
|
||||
async def analyze_station_list(self, image_path):
|
||||
"""
|
||||
分析场站列表页图片,提取场站位置和基本信息
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user