1291 lines
56 KiB
Python
1291 lines
56 KiB
Python
import numpy as np
|
||
from PIL import Image
|
||
import os
|
||
import asyncio
|
||
import hashlib
|
||
import json
|
||
import aiohttp
|
||
import logging
|
||
import base64
|
||
from openai import OpenAI, BadRequestError
|
||
from Config.Config import (
|
||
ALY_LLM_API_KEY, VL_MODEL_NAME, VL_MODEL_NAME_AD,
|
||
SAFE_EXCLUDE_RATIO, FALLBACK_WIDTH, FALLBACK_HEIGHT,
|
||
BOTTOM_SAFE_EXCLUDE_RATIO
|
||
)
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
class XinDianTuReadImageKit:
|
||
_client = OpenAI(
|
||
api_key=ALY_LLM_API_KEY,
|
||
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||
)
|
||
|
||
# 通用回退设备信息,仅在无法动态获取设备信息时使用
|
||
_FALLBACK_DEVICE_INFO = {
|
||
"displayWidth": FALLBACK_WIDTH,
|
||
"displayHeight": FALLBACK_HEIGHT,
|
||
"productName": "generic"
|
||
}
|
||
|
||
_prompt = (
|
||
"仅输出JSON数组(不含任何说明文字),按从左到右、从上到下的顺序识别图片中的充电站区域。识别规则如下:\n"
|
||
"1. 必须是卡片形式的充电站信息区域。\n"
|
||
"2. 每一个卡片必须同时具备以下所有要素,否则严禁识别:\n"
|
||
" - 场站名称 (station_name);\n"
|
||
" - 距离信息 (distance, 例如 '5.3km'),位于卡片右上角;\n"
|
||
" - 金额/电费 (price,例如 '0.8490');\n"
|
||
" - 充电枪信息 (piles,包含'超'、'快'或'慢'的类型、总枪数和空闲枪数,例如 '快 闲4/4')。\n"
|
||
"3. 如果缺少上述任何一项要素(例如只有名称和距离,但没有电费或枪数信息),说明它不是真正的场站卡片(可能是广告或功能入口),请直接跳过。\n"
|
||
"\n"
|
||
"JSON对象字段要求:\n"
|
||
"1. b_use: 状态标识(1或0)。如果场站名称为灰色或带有“暂停使用”等标签,则为0,否则为1。\n"
|
||
"2. station_name: 场站名称;\n"
|
||
"3. price: 一度电的价格(数字);\n"
|
||
"4. pro_price: Pro会员价格(数字),无则为null;\n"
|
||
"5. piles: 充电枪列表 [{type: '快', free: 4, total: 4}];\n"
|
||
"6. parking: 停车费用描述(通常在'P'图标后,例如 '收费停车:以场站实际收费规则为准' 或 '限时免费停车...')。\n"
|
||
"7. distance: 距离信息字符串(例如 '5.3km')。\n"
|
||
"8. bounds: {x1,y1,x2,y2} 区域像素坐标(0-1000);\n"
|
||
"9. bounds_norm: {left,top,right,bottom} 归一化坐标(0-1);\n"
|
||
"10. station_name_bounds: 场站名称文字区域坐标 {x1,y1,x2,y2}(0-1000);\n"
|
||
"11. station_name_bounds_norm: 场站名称文字归一化坐标(0-1)。\n"
|
||
"\n"
|
||
"重要约束(违反者不予识别):\n"
|
||
"A. 严禁识别广告位和筛选标签。如“夜间免停”、“洗手间”、“不限车长”、“不限车高”、“组团”、“综合排序”等均不是场站。\n"
|
||
"B. 真正的场站卡片必须是一个横跨屏幕的大卡片,包含:场站名称(大号加粗)、金额(¥开头)、距离(km结尾)、充电枪状态(闲x/x)。\n"
|
||
"C. 严禁将屏幕中间的筛选标签误认为场站卡片。\n"
|
||
"\n"
|
||
"严格返回纯JSON格式。"
|
||
)
|
||
|
||
@staticmethod
|
||
def _extract_json(text: str) -> str:
|
||
if not text:
|
||
return "[]"
|
||
|
||
cleaned = text.strip()
|
||
if "```" in cleaned:
|
||
lines = []
|
||
for line in cleaned.splitlines():
|
||
if line.strip().startswith("```"):
|
||
continue
|
||
lines.append(line)
|
||
cleaned = "\n".join(lines).strip()
|
||
|
||
decoder = json.JSONDecoder()
|
||
|
||
pos = 0
|
||
while pos < len(cleaned):
|
||
idx_dict = cleaned.find("{", pos)
|
||
idx_list = cleaned.find("[", pos)
|
||
|
||
candidates = [i for i in (idx_dict, idx_list) if i != -1]
|
||
if not candidates:
|
||
break
|
||
|
||
start = min(candidates)
|
||
snippet = cleaned[start:]
|
||
try:
|
||
_, end = decoder.raw_decode(snippet)
|
||
return snippet[:end]
|
||
except json.JSONDecodeError:
|
||
pos = start + 1
|
||
continue
|
||
|
||
return "[]"
|
||
|
||
@staticmethod
|
||
def _add_center(obj, device_info):
|
||
return XinDianTuReadImageKit._add_click_point(obj, device_info, anchor="center")
|
||
|
||
@staticmethod
|
||
def _add_click_point(obj, device_info, anchor: str = "center"):
|
||
# 获取显示宽高,增加容错
|
||
display_width = float(device_info.get("displayWidth") or device_info.get("width") or 1080)
|
||
display_height = float(device_info.get("displayHeight") or device_info.get("height") or 2400)
|
||
|
||
bounds = obj.get("bounds")
|
||
bn = obj.get("bounds_norm")
|
||
text_bounds = obj.get("station_name_bounds")
|
||
text_bn = obj.get("station_name_bounds_norm")
|
||
uia_x = None
|
||
uia_y = None
|
||
|
||
def get_pixel_coords(b_data, d_w, d_h):
|
||
if not b_data:
|
||
return None
|
||
|
||
# Extract values based on dict or list
|
||
if isinstance(b_data, list) and len(b_data) == 4:
|
||
v1, v2, v3, v4 = b_data
|
||
elif isinstance(b_data, dict):
|
||
# 兼容多种可能的键名
|
||
v1 = b_data.get("left") if b_data.get("left") is not None else b_data.get("x1")
|
||
v2 = b_data.get("top") if b_data.get("top") is not None else b_data.get("y1")
|
||
v3 = b_data.get("right") if b_data.get("right") is not None else b_data.get("x2")
|
||
v4 = b_data.get("bottom") if b_data.get("bottom") is not None else b_data.get("y2")
|
||
|
||
if any(v is None for v in (v1, v2, v3, v4)):
|
||
return None
|
||
else:
|
||
return None
|
||
|
||
try:
|
||
v1, v2, v3, v4 = float(v1), float(v2), float(v3), float(v4)
|
||
except (ValueError, TypeError):
|
||
return None
|
||
|
||
max_v = max(v1, v2, v3, v4)
|
||
|
||
# 1. 0-1 归一化坐标
|
||
if max_v <= 1.05:
|
||
x1, y1, x2, y2 = v1 * d_w, v2 * d_h, v3 * d_w, v4 * d_h
|
||
# 2. 0-1000 归一化坐标 (Qwen-VL 常用)
|
||
elif max_v <= 1005:
|
||
x1, y1, x2, y2 = (v1 / 1000.0) * d_w, (v2 / 1000.0) * d_h, (v3 / 1000.0) * d_w, (v4 / 1000.0) * d_h
|
||
# 3. 绝对像素坐标
|
||
else:
|
||
x1, y1, x2, y2 = v1, v2, v3, v4
|
||
|
||
return min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)
|
||
|
||
# 优先使用场站名称区域进行点击 (最安全)
|
||
coords = get_pixel_coords(text_bn, display_width, display_height) or get_pixel_coords(text_bounds, display_width, display_height)
|
||
if coords:
|
||
x1, y1, x2, y2 = coords
|
||
uia_x = int(x1 + (x2 - x1) / 2)
|
||
uia_y = int(y1 + (y2 - y1) / 2)
|
||
logger.info(f"坐标计算: 使用文字区域 -> ({uia_x}, {uia_y}) | 区域: {coords} | 屏幕: {display_width}x{display_height}")
|
||
|
||
# 备选:使用整个卡片区域
|
||
if uia_x is None:
|
||
coords = get_pixel_coords(bn, display_width, display_height) or get_pixel_coords(bounds, display_width, display_height)
|
||
if coords:
|
||
x1, y1, x2, y2 = coords
|
||
w, h = x2 - x1, y2 - y1
|
||
if anchor == "top_left":
|
||
uia_x = int(x1 + max(5.0, w * 0.15))
|
||
uia_y = int(y1 + max(5.0, h * 0.20))
|
||
else:
|
||
# 默认中心点,但稍微偏上一点,避开底部可能的按钮
|
||
uia_x = int(x1 + w / 2)
|
||
uia_y = int(y1 + h * 0.4)
|
||
logger.info(f"坐标计算: 使用卡片区域 -> ({uia_x}, {uia_y}) | 区域: {coords} | 屏幕: {display_width}x{display_height}")
|
||
|
||
# --- 安全过滤:过滤掉屏幕顶部的点击坐标 (通常是广告或菜单) ---
|
||
if uia_y is not None:
|
||
# 如果点击点在屏幕顶部 SAFE_EXCLUDE_RATIO 范围内,极大概率是误触广告位,将其排除
|
||
if uia_y < (display_height * SAFE_EXCLUDE_RATIO):
|
||
logger.warning(f"安全排除: 坐标 ({uia_x}, {uia_y}) 位于屏幕顶部 {int(SAFE_EXCLUDE_RATIO*100)}% 区域,疑似广告或菜单,已忽略。")
|
||
return None
|
||
|
||
# 如果点击点在屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 范围内,极大概率是误触底部功能区(如扫码充电),将其排除
|
||
if uia_y > (display_height * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
|
||
logger.warning(f"安全排除: 坐标 ({uia_x}, {uia_y}) 位于屏幕底部 {int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}% 区域,疑似底部功能区,已忽略。")
|
||
return None
|
||
|
||
if uia_x is not None and uia_y is not None:
|
||
obj["uia_center_x"] = uia_x
|
||
obj["uia_center_y"] = uia_y
|
||
|
||
# Clean up temporary fields
|
||
for k in ["bounds", "bounds_norm", "station_name_bounds", "station_name_bounds_norm"]:
|
||
if k in obj:
|
||
del obj[k]
|
||
return obj
|
||
|
||
@staticmethod
|
||
async def find_all_time_button_coordinate(image_url: str, device_info: dict = None) -> dict:
|
||
if device_info is None:
|
||
logger.warning("未提供动态设备信息,使用通用回退配置。")
|
||
device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO
|
||
|
||
prompt = (
|
||
"仅输出JSON对象(不含任何说明文字),请找到图片中带有“全部时段”字样的按钮区域(通常在价格表下方,是一个带有右箭头的文字按钮)。\n"
|
||
"返回格式示例:\n"
|
||
"{\n"
|
||
' "bounds": {"x1": 100, "y1": 200, "x2": 300, "y2": 400}, \n'
|
||
' "bounds_norm": {"left": 0.1, "top": 0.2, "right": 0.3, "bottom": 0.4}\n'
|
||
"}\n"
|
||
"注意:bounds应使用0-1000的归一化坐标空间。\n"
|
||
"如果未找到,返回空JSON {}。"
|
||
)
|
||
|
||
loop = asyncio.get_event_loop()
|
||
resp = await loop.run_in_executor(
|
||
None,
|
||
lambda: XinDianTuReadImageKit._client.chat.completions.create(
|
||
model=VL_MODEL_NAME,
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": image_url
|
||
},
|
||
},
|
||
{"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
|
||
{"type": "text", "text": prompt},
|
||
],
|
||
},
|
||
],
|
||
)
|
||
)
|
||
|
||
content = resp.choices[0].message.content or ""
|
||
raw = XinDianTuReadImageKit._extract_json(content)
|
||
|
||
try:
|
||
data = json.loads(raw)
|
||
if isinstance(data, dict) and (data.get("bounds") or data.get("bounds_norm")):
|
||
data = XinDianTuReadImageKit._add_center(data, device_info)
|
||
# 只返回中心坐标
|
||
return {
|
||
"uia_center_x": data.get("uia_center_x"),
|
||
"uia_center_y": data.get("uia_center_y")
|
||
}
|
||
return {}
|
||
except Exception as e:
|
||
logger.error(f"Error parsing JSON: {e}")
|
||
logger.error(f"Raw content: {raw}")
|
||
return {}
|
||
|
||
@staticmethod
|
||
async def find_station_coordinate_first_page(image_url: str, station_name: str, device_info: dict = None) -> dict:
|
||
if device_info is None:
|
||
logger.warning("未提供动态设备信息,使用通用回退配置。")
|
||
device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO
|
||
|
||
prompt = (
|
||
"仅输出JSON对象(不含任何说明文字)。"
|
||
"请在图片中找到场站名称完全匹配“" + str(station_name) + "”的那一行/卡片,"
|
||
"返回该场站名称文字本身的区域坐标(优先返回归一化坐标)。"
|
||
"返回格式示例:"
|
||
"{\"station_name_bounds\": {\"x1\": 100, \"y1\": 200, \"x2\": 300, \"y2\": 240}, "
|
||
"\"station_name_bounds_norm\": {\"left\": 0.1, \"top\": 0.2, \"right\": 0.3, \"bottom\": 0.24}}"
|
||
"注意:bounds使用0-1000归一化坐标空间;如果找不到,返回空JSON {}。"
|
||
)
|
||
|
||
loop = asyncio.get_event_loop()
|
||
resp = await loop.run_in_executor(
|
||
None,
|
||
lambda: XinDianTuReadImageKit._client.chat.completions.create(
|
||
model=VL_MODEL_NAME,
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "image_url", "image_url": {"url": image_url}},
|
||
{"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
|
||
{"type": "text", "text": prompt},
|
||
],
|
||
},
|
||
],
|
||
)
|
||
)
|
||
|
||
content = resp.choices[0].message.content or ""
|
||
raw = XinDianTuReadImageKit._extract_json(content)
|
||
|
||
try:
|
||
data = json.loads(raw)
|
||
if isinstance(data, dict) and (data.get("station_name_bounds") or data.get("station_name_bounds_norm") or data.get("bounds") or data.get("bounds_norm")):
|
||
data = XinDianTuReadImageKit._add_click_point(data, device_info, anchor="station_text")
|
||
return {
|
||
"uia_center_x": data.get("uia_center_x"),
|
||
"uia_center_y": data.get("uia_center_y")
|
||
}
|
||
return {}
|
||
except Exception:
|
||
return {}
|
||
|
||
@staticmethod
|
||
async def parse_first_level_image_url(image_url: str, device_info: dict = None) -> list:
|
||
if device_info is None:
|
||
logger.warning("未提供动态设备信息,使用通用回退配置。")
|
||
device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO
|
||
|
||
loop = asyncio.get_event_loop()
|
||
resp = await loop.run_in_executor(
|
||
None,
|
||
lambda: XinDianTuReadImageKit._client.chat.completions.create(
|
||
model=VL_MODEL_NAME,
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {
|
||
"url": image_url
|
||
},
|
||
},
|
||
{"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
|
||
{"type": "text", "text": XinDianTuReadImageKit._prompt},
|
||
],
|
||
},
|
||
],
|
||
)
|
||
)
|
||
|
||
content = resp.choices[0].message.content or ""
|
||
raw = XinDianTuReadImageKit._extract_json(content)
|
||
|
||
try:
|
||
data = json.loads(raw)
|
||
if isinstance(data, list):
|
||
# 过滤掉 None (即被 _add_click_point 排除掉的顶部项)
|
||
data = [XinDianTuReadImageKit._add_click_point(x, device_info, anchor="station_text") for x in data if x.get("b_use", 1) == 1]
|
||
data = [x for x in data if x is not None]
|
||
elif isinstance(data, dict):
|
||
if data.get("b_use", 1) == 1:
|
||
data = XinDianTuReadImageKit._add_click_point(data, device_info, anchor="station_text")
|
||
data = [data] if data is not None else []
|
||
else:
|
||
data = []
|
||
|
||
# Clean up temporary field b_use if it still exists
|
||
if isinstance(data, list):
|
||
for item in data:
|
||
if "b_use" in item:
|
||
del item["b_use"]
|
||
elif isinstance(data, dict) and "b_use" in data:
|
||
del data["b_use"]
|
||
|
||
# If dict result, wrap in list for consistency if needed, but keeping as is based on original logic logic implies list return
|
||
if isinstance(data, dict):
|
||
return [data] if data else []
|
||
|
||
if isinstance(data, list) and data:
|
||
# 移除旧的关键词过滤逻辑,完全信任视觉模型的识别结果
|
||
# 如果未来发现模型识别了太多广告,可以在 Prompt 中强化 "严禁识别广告" 的约束
|
||
filtered = []
|
||
for item in data:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
name = item.get("station_name")
|
||
if not name:
|
||
continue
|
||
filtered.append(item)
|
||
data = filtered
|
||
|
||
if isinstance(data, list) and data:
|
||
tasks = []
|
||
idxs = []
|
||
for i, item in enumerate(data):
|
||
if not isinstance(item, dict):
|
||
continue
|
||
if item.get("uia_center_x") is None or item.get("uia_center_y") is None:
|
||
name = item.get("station_name")
|
||
if name:
|
||
idxs.append(i)
|
||
tasks.append(asyncio.create_task(
|
||
XinDianTuReadImageKit.find_station_coordinate_first_page(image_url, name, device_info)
|
||
))
|
||
if tasks:
|
||
results = await asyncio.gather(*tasks, return_exceptions=True)
|
||
for i, r in zip(idxs, results):
|
||
if isinstance(r, dict) and r.get("uia_center_x") is not None and r.get("uia_center_y") is not None:
|
||
data[i]["uia_center_x"] = r.get("uia_center_x")
|
||
data[i]["uia_center_y"] = r.get("uia_center_y")
|
||
|
||
for item in data:
|
||
if isinstance(item, dict):
|
||
item["uia_center_x"] = 100
|
||
item.setdefault("uia_center_y", None)
|
||
|
||
return data
|
||
except Exception as e:
|
||
logger.error(f"Error parsing JSON: {e}")
|
||
logger.error(f"Raw content: {raw}")
|
||
return []
|
||
|
||
@staticmethod
|
||
async def parse_hybrid_image(image_path, uploader, cdn_domain):
|
||
"""
|
||
统一入口:混合识别模式
|
||
"""
|
||
return await XinDianTuReadImageKit.get_stations_hybrid(image_path, uploader, cdn_domain)
|
||
|
||
@staticmethod
|
||
async def parse_vl_image(vl_image_url, json_metadata, device_info=None):
|
||
"""
|
||
基于 _vl.jpg (带绿框) 和 JSON 元数据进行识别
|
||
"""
|
||
if not json_metadata or "cards" not in json_metadata:
|
||
return []
|
||
|
||
cards_meta = json_metadata["cards"]
|
||
# 按 id 或 Y 坐标排序,确保顺序一致 (Kit 生成时已经是 top-down)
|
||
cards_meta.sort(key=lambda x: x["rect"][1])
|
||
|
||
prompt = (
|
||
"图片中用绿色矩形框标记了若干个充电站卡片区域。\n"
|
||
"请按从上到下的顺序,依次识别每个绿色框内的场站信息,并返回一个JSON数组。\n"
|
||
"数组中元素的顺序必须与图片中绿色框从上到下的顺序严格一致。\n"
|
||
"如果某个框内不是有效的场站卡片(例如是广告),请返回null或空对象,不要跳过顺序。\n"
|
||
"\n"
|
||
"每个JSON对象包含以下字段:\n"
|
||
"1. station_name: 场站名称;\n"
|
||
"2. price: 价格(数字);\n"
|
||
"3. pro_price: Pro会员价(数字,无则null);\n"
|
||
"4. piles: 充电枪描述字符串(例如 '快 闲4/4');\n"
|
||
"5. tags: 标签列表(如 ['限时免费']);\n"
|
||
"6. parking: 停车费用描述(通常在'P'图标后,例如 '收费停车:以场站实际收费规则为准' 或 '限时免费停车...')。\n"
|
||
"7. distance: 距离信息字符串(例如 '5.3km')。\n"
|
||
"\n"
|
||
"严格返回纯JSON格式。"
|
||
)
|
||
|
||
loop = asyncio.get_event_loop()
|
||
resp = await loop.run_in_executor(
|
||
None,
|
||
lambda: XinDianTuReadImageKit._client.chat.completions.create(
|
||
model=VL_MODEL_NAME,
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "image_url", "image_url": {"url": vl_image_url}},
|
||
{"type": "text", "text": prompt},
|
||
],
|
||
},
|
||
],
|
||
)
|
||
)
|
||
|
||
content = resp.choices[0].message.content or ""
|
||
raw = XinDianTuReadImageKit._extract_json(content)
|
||
|
||
final_stations = []
|
||
try:
|
||
llm_data = json.loads(raw)
|
||
if isinstance(llm_data, list):
|
||
# 尝试与 cards_meta 对齐
|
||
count = min(len(llm_data), len(cards_meta))
|
||
for i in range(count):
|
||
item = llm_data[i]
|
||
meta = cards_meta[i]
|
||
|
||
if not item or not isinstance(item, dict):
|
||
continue
|
||
|
||
name = item.get("station_name")
|
||
if not name:
|
||
continue
|
||
|
||
# 注入元数据中的点击坐标
|
||
click_pt = meta.get("click_point", [0, 0])
|
||
cx, cy = click_pt
|
||
|
||
# 安全检查
|
||
img_h = 2400
|
||
if device_info:
|
||
img_h = float(device_info.get("displayHeight") or device_info.get("height") or 2400)
|
||
else:
|
||
img_h = json_metadata.get("height", 2400)
|
||
|
||
# 顶部安全排除
|
||
if cy < (img_h * SAFE_EXCLUDE_RATIO):
|
||
logger.warning(f"VL安全排除: 坐标 ({cx}, {cy}) 位于屏幕顶部 {int(SAFE_EXCLUDE_RATIO*100)}% 区域,已忽略。")
|
||
continue
|
||
|
||
# 底部安全排除
|
||
if cy > (img_h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
|
||
logger.warning(f"VL安全排除: 坐标 ({cx}, {cy}) 位于屏幕底部 {int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}% 区域,疑似底部功能区,已忽略。")
|
||
continue
|
||
|
||
item["uia_center_x"] = cx
|
||
item["uia_center_y"] = cy
|
||
item["rect"] = meta.get("rect")
|
||
|
||
final_stations.append(item)
|
||
else:
|
||
logger.warning(f"LLM return format error: expected list, got {type(llm_data)}")
|
||
|
||
except Exception as e:
|
||
logger.error(f"Error parsing VL response: {e}")
|
||
|
||
return final_stations
|
||
|
||
@staticmethod
|
||
async def get_stations_hybrid(image_path, uploader, cdn_domain):
|
||
"""
|
||
混合识别模式:图形学切片 + 大模型小图 OCR
|
||
"""
|
||
# 1. 图形学切片
|
||
segments = XinDianTuReadImageKit.get_card_segments(image_path)
|
||
if not segments:
|
||
return []
|
||
|
||
# 2. 对每个切片并行进行 OCR
|
||
tasks = []
|
||
for seg in segments:
|
||
# 上传切片
|
||
patch_path = seg["patch_path"]
|
||
remote_path = f"tmp/patches/{os.path.basename(patch_path)}"
|
||
uploader.upload_file(patch_path, remote_path)
|
||
patch_url = f"{cdn_domain}/{remote_path}"
|
||
|
||
# 记录上传后的 URL 供识别使用
|
||
seg["patch_url"] = patch_url
|
||
tasks.append(XinDianTuReadImageKit.recognize_card_text(patch_url))
|
||
|
||
# 等待所有 OCR 完成
|
||
results = await asyncio.gather(*tasks)
|
||
|
||
# 3. 组装结果
|
||
final_stations = []
|
||
for i, res in enumerate(results):
|
||
name = res.get("station_name")
|
||
if name and name != "未知":
|
||
seg = segments[i]
|
||
res["x"] = seg["center_x"]
|
||
res["y"] = seg["center_y"]
|
||
final_stations.append(res)
|
||
|
||
# 4. 后置过滤 (复用原有过滤逻辑)
|
||
if final_stations:
|
||
processed_excluded_titles = {str(x).replace(" ", "").strip() for x in STATION_EXCLUDED_TITLES}
|
||
filtered = []
|
||
for item in final_stations:
|
||
name = item.get("station_name")
|
||
normalized_name = str(name).replace(" ", "").strip()
|
||
if normalized_name in processed_excluded_titles:
|
||
continue
|
||
if any(kw in normalized_name for kw in STATION_BLACKLIST_KEYWORDS):
|
||
continue
|
||
filtered.append(item)
|
||
final_stations = filtered
|
||
|
||
return final_stations
|
||
|
||
@staticmethod
|
||
@staticmethod
|
||
def get_card_segments(image_path, output_dir="./Debug/Patches"):
|
||
"""
|
||
基于水平灰色分割带 (Divider Band) 切取场站卡片
|
||
返回: list of dict {"patch_path": str, "center_x": int, "center_y": int, "y_range": tuple}
|
||
"""
|
||
if not os.path.exists(output_dir):
|
||
os.makedirs(output_dir)
|
||
|
||
try:
|
||
img = Image.open(image_path).convert('RGB')
|
||
width, height = img.size
|
||
img_data = np.array(img)
|
||
|
||
logger.info(f"页面分析 - 分辨率: {width}x{height}")
|
||
|
||
# 1. 行特征分析:识别分割线 (Divider)
|
||
# 分割线特征:横贯全屏,亮度均匀,通常比纯白(255)稍暗,比文字内容亮
|
||
# 典型值:Mean=242-247, Std<10
|
||
|
||
row_types = [] # 0: Unknown/Content, 1: Divider, 2: White/Empty
|
||
|
||
# 采样点:左中右
|
||
l_x, m_x, r_x = int(width * 0.05), int(width * 0.5), int(width * 0.95)
|
||
|
||
debug_rows = []
|
||
|
||
for y in range(height):
|
||
# 避开顶部和底部导航栏 (15% - 85%) - 扩大排除范围以避免误识别 Header/Footer
|
||
if y < height * 0.15 or y > height * 0.85:
|
||
row_types.append(1) # 视为无关区域 (标记为 Divider 以防止形成 Segment)
|
||
continue
|
||
|
||
row = img_data[y]
|
||
row_mean = np.mean(row)
|
||
row_std = np.std(row)
|
||
|
||
# 判定逻辑:
|
||
# 1. 纯白行 (卡片底色) -> mean > 252 (允许微小噪点)
|
||
# 2. 分割线 (Divider) -> 230 < mean < 252 且 std < 15 (颜色均匀)
|
||
# 3. 内容行 (Content) -> mean <= 230 或 (mean > 230 且 std >= 15) (有文字/图片导致方差大)
|
||
|
||
if row_mean > 252:
|
||
r_type = 2 # White/Empty
|
||
elif 230 < row_mean <= 252 and row_std < 15:
|
||
r_type = 1 # Divider
|
||
else:
|
||
r_type = 0 # Content
|
||
|
||
row_types.append(r_type)
|
||
|
||
# Debug log sampling
|
||
if y % 50 == 0:
|
||
debug_rows.append(f"Row {y}: Mean={row_mean:.1f}, Std={row_std:.1f} -> Type={r_type}")
|
||
|
||
if debug_rows:
|
||
logger.info("行特征采样 (调试用):\n" + "\n".join(debug_rows))
|
||
|
||
# 2. 聚合连续的 Content 区域 (Type 0 or Type 2 sandwiched by Type 0)
|
||
# 实际上,卡片是由 Divider 分隔开的区域。
|
||
# 我们寻找两个 Divider 之间的区域,且该区域必须包含 Content (Type 0)。
|
||
|
||
segments = []
|
||
|
||
# 简化状态机:
|
||
# 寻找非 Divider 的连续段
|
||
in_segment = False
|
||
seg_start = -1
|
||
has_content = False
|
||
|
||
raw_blocks = []
|
||
|
||
for y, r_type in enumerate(row_types):
|
||
if r_type != 1: # Not Divider
|
||
if not in_segment:
|
||
in_segment = True
|
||
seg_start = y
|
||
has_content = False
|
||
|
||
if r_type == 0:
|
||
has_content = True
|
||
else: # Is Divider
|
||
if in_segment:
|
||
# 结束一段
|
||
if has_content: # 只有包含内容的段才算
|
||
raw_blocks.append((seg_start, y))
|
||
in_segment = False
|
||
|
||
# 处理最后一段
|
||
if in_segment and has_content:
|
||
raw_blocks.append((seg_start, len(row_types)))
|
||
|
||
# 3. 过滤和后处理
|
||
# 合并距离很近的块?或者过滤太小的块
|
||
base_name = os.path.splitext(os.path.basename(image_path))[0]
|
||
|
||
valid_segments = []
|
||
logger.info(f"Initial raw blocks count: {len(raw_blocks)}")
|
||
|
||
for i, (y1, y2) in enumerate(raw_blocks):
|
||
h = y2 - y1
|
||
# 过滤太小的块 (可能是杂噪或单纯的文字行)
|
||
if h < 50:
|
||
# logger.debug(f"Block {i} too small: {h}")
|
||
continue
|
||
# 过滤太大的块 (可能是全屏错误)
|
||
if h > 1000:
|
||
continue
|
||
|
||
# 再次确认内部是否有足够的“暗像素”(内容)
|
||
# 避免切出纯白的空隙
|
||
region = img_data[y1:y2]
|
||
region_mean = np.mean(region)
|
||
if region_mean > 254.5: # 整体太白 (放宽阈值,避免误杀大面积白色的卡片)
|
||
logger.info(f" [-] 忽略区域 {i}: Y({y1}-{y2}), H={h}, 整体太白 (Mean={region_mean:.1f} > 254.5)")
|
||
continue
|
||
|
||
# --- 优化:边缘背景检查与宽度裁剪 (Margin Check & Crop) ---
|
||
# 不再直接拒绝白边,而是尝试计算内容的有效宽度
|
||
# 假设:有效内容行的方差较高,或者亮度显著不同于背景
|
||
|
||
# 简单策略:保留全宽,但增加高度限制
|
||
if h < 60: # 稍微降低阈值以测试
|
||
logger.info(f" [-] 忽略区域 {i}: Y({y1}-{y2}), H={h}, 高度不足 (<60)")
|
||
continue
|
||
|
||
# 计算左右边距的平均亮度,辅助判断(仅记录日志,不强行过滤)
|
||
margin_w = max(5, int(width * 0.05))
|
||
l_margin = img_data[y1:y2, 0:margin_w]
|
||
r_margin = img_data[y1:y2, width-margin_w:width]
|
||
l_mean = np.mean(l_margin)
|
||
r_mean = np.mean(r_margin)
|
||
|
||
# 如果是全宽卡片,左右边缘可能是白色的。
|
||
# 之前的逻辑: if l_mean > 252 or r_mean > 252: continue (导致漏检)
|
||
# 现在移除该逻辑。
|
||
|
||
# --- 新增:自动裁剪水平宽度 (Auto Horizontal Crop) ---
|
||
# 尝试找到内容的左右边界 (基于列的方差或亮度差异)
|
||
x1, x2 = 0, width
|
||
|
||
# 从左向右扫描
|
||
for x in range(0, int(width * 0.4), 2):
|
||
col = img_data[y1:y2, x]
|
||
# 如果这一列不是纯色背景 (std > 5) 或者明显比背景暗 (mean < 245)
|
||
if np.std(col) > 5 or np.mean(col) < 245:
|
||
x1 = x
|
||
break
|
||
|
||
# 从右向左扫描
|
||
for x in range(width - 1, int(width * 0.6), -2):
|
||
col = img_data[y1:y2, x]
|
||
if np.std(col) > 5 or np.mean(col) < 245:
|
||
x2 = x + 1
|
||
break
|
||
|
||
# 增加一点 padding
|
||
x1 = max(0, x1 - 10)
|
||
x2 = min(width, x2 + 10)
|
||
|
||
# 如果裁剪后宽度太小,可能不是有效卡片
|
||
if (x2 - x1) < width * 0.5:
|
||
logger.info(f" [-] 忽略区域 {i}: 裁剪后宽度过小 ({x2-x1})")
|
||
continue
|
||
|
||
# 保存
|
||
patch = img.crop((x1, y1, x2, y2))
|
||
patch_name = f"{base_name}_p{i}_{y1}.jpg"
|
||
patch_path = os.path.join(output_dir, patch_name)
|
||
patch.save(patch_path)
|
||
|
||
center_y = (y1 + y2) // 2
|
||
valid_segments.append({
|
||
"patch_path": os.path.abspath(patch_path),
|
||
"center_x": (x1 + x2) // 2,
|
||
"center_y": center_y,
|
||
"y_range": (y1, y2)
|
||
})
|
||
logger.info(f" [+] 发现卡片 {i}: Y({y1}-{y2}), H={h}, Crop X({x1}-{x2}), 边缘(L={l_mean:.1f}, R={r_mean:.1f}), 已保存")
|
||
|
||
logger.info(f"分析完成:识别到 {len(valid_segments)} 个区域")
|
||
return valid_segments
|
||
|
||
except Exception as e:
|
||
logger.error(f"图形学切片失败: {e}", exc_info=True)
|
||
return []
|
||
|
||
@staticmethod
|
||
async def recognize_card_text(patch_url):
|
||
"""
|
||
对切片进行小图 OCR 识别,提取场站详细信息
|
||
"""
|
||
loop = asyncio.get_event_loop()
|
||
resp = await loop.run_in_executor(
|
||
None,
|
||
lambda: XinDianTuReadImageKit._client.chat.completions.create(
|
||
model=VL_MODEL_NAME,
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "text",
|
||
"text": "请识别图片中的充电站信息,并以 JSON 格式输出:{\"station_name\": \"...\", \"price\": \"...\", \"piles\": \"空闲数/总数\"}。只输出 JSON,不要有其他文字。"
|
||
},
|
||
{"type": "image_url", "image_url": {"url": patch_url}}
|
||
]
|
||
}
|
||
],
|
||
max_tokens=200
|
||
)
|
||
)
|
||
content = resp.choices[0].message.content.strip()
|
||
# 尝试解析 JSON
|
||
try:
|
||
import json
|
||
import re
|
||
json_match = re.search(r'\{.*\}', content, re.DOTALL)
|
||
if json_match:
|
||
return json.loads(json_match.group())
|
||
except Exception:
|
||
logger.warning(f"OCR 结果解析 JSON 失败: {content}")
|
||
|
||
return {"station_name": "未知"}
|
||
|
||
@staticmethod
|
||
async def detect_ad(image_url: str, device_info: dict = None) -> dict:
|
||
"""
|
||
检测图片中是否存在广告弹窗,并定位关闭按钮坐标
|
||
|
||
参数:
|
||
image_url: 图片地址
|
||
device_info: 设备信息
|
||
|
||
返回:
|
||
{
|
||
"has_ad": bool,
|
||
"uia_center_x": int|None,
|
||
"uia_center_y": int|None
|
||
}
|
||
"""
|
||
if device_info is None:
|
||
logger.warning("未提供动态设备信息,使用通用回退配置。")
|
||
device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO
|
||
|
||
prompt = (
|
||
"分析图片中是否存在覆盖在主界面上的广告弹窗(Popup Ad)。"
|
||
"如果存在,请找到关闭该弹窗的按钮(通常是一个带有 'X' 的图标,可能在弹窗的右上角、右下角或正下方)。"
|
||
"仅输出JSON对象(不含任何说明文字),包含以下字段:"
|
||
"1. has_ad: 布尔值,是否存在广告弹窗;"
|
||
"2. close_button_bounds: 关闭按钮的像素坐标或归一化坐标(0-1000) {x1,y1,x2,y2}。如果不存在广告则为 null。"
|
||
"注意:只需识别最明显的那个关闭按钮。严格返回纯JSON。"
|
||
)
|
||
|
||
loop = asyncio.get_event_loop()
|
||
resp = await loop.run_in_executor(
|
||
None,
|
||
lambda: XinDianTuReadImageKit._client.chat.completions.create(
|
||
model=VL_MODEL_NAME_AD, # 使用更强的视觉模型
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {"url": image_url},
|
||
},
|
||
{"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
|
||
{"type": "text", "text": prompt},
|
||
],
|
||
},
|
||
],
|
||
)
|
||
)
|
||
|
||
content = resp.choices[0].message.content or ""
|
||
raw = XinDianTuReadImageKit._extract_json(content)
|
||
|
||
try:
|
||
data = json.loads(raw)
|
||
has_ad = data.get("has_ad", False)
|
||
if has_ad and data.get("close_button_bounds"):
|
||
# 使用现有的辅助方法转换坐标
|
||
temp_obj = {"bounds": data.get("close_button_bounds")}
|
||
XinDianTuReadImageKit._add_click_point(temp_obj, device_info)
|
||
return {
|
||
"has_ad": True,
|
||
"uia_center_x": temp_obj.get("uia_center_x"),
|
||
"uia_center_y": temp_obj.get("uia_center_y")
|
||
}
|
||
return {"has_ad": False, "uia_center_x": None, "uia_center_y": None}
|
||
except Exception as e:
|
||
logger.error(f"Error parsing ad detection JSON: {e}")
|
||
return {"has_ad": False, "uia_center_x": None, "uia_center_y": None}
|
||
|
||
@staticmethod
|
||
async def _download_as_base64(url: str) -> str:
|
||
"""Helper to download image and convert to base64 for VL model fallback"""
|
||
try:
|
||
async with aiohttp.ClientSession() as session:
|
||
async with session.get(url) as resp:
|
||
if resp.status != 200:
|
||
return None
|
||
content = await resp.read()
|
||
if not content:
|
||
return None
|
||
return base64.b64encode(content).decode('utf-8')
|
||
except Exception as e:
|
||
logger.error(f"Failed to download image for base64 conversion: {e}")
|
||
return None
|
||
|
||
@staticmethod
|
||
async def parse_price_schedule(station_name: str, image_url: str, device_info: dict = None) -> list:
|
||
"""
|
||
解析价格时段表(整图),提取每一行的时间区间与费用信息
|
||
|
||
参数:
|
||
image_url: 图片地址(包含“当前时段电费/全部时段电费”弹窗或列表)
|
||
device_info: 设备信息,用于提升视觉理解一致性(可选)
|
||
|
||
返回:
|
||
列表,每个元素为:
|
||
{
|
||
"start": "HH:MM", 开始时间
|
||
"end": "HH:MM", 结束时间
|
||
"price_kwh": float|None, 总价或站点价(元/度)
|
||
"electric_fee_kwh": float|None, 电费(元/度)
|
||
"service_fee_kwh": float|None 服务费(元/度)
|
||
}
|
||
说明:
|
||
- 使用 qwen3-vl-flash 进行视觉解析,提示词约束输出为纯 JSON 数组
|
||
- 若某项缺失则返回 None(保持结构统一)
|
||
"""
|
||
if device_info is None:
|
||
logger.warning("未提供动态设备信息,使用通用回退配置。")
|
||
device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO
|
||
|
||
# 视觉解析提示词:约束输出字段与格式,避免模型输出说明文字
|
||
prompt = (
|
||
"仅输出JSON数组(不含任何说明文字)。识别图片中所有时段的价格信息,返回每一行:"
|
||
"1) start: 开始时间(HH:MM),2) end: 结束时间(HH:MM),"
|
||
"3) price_kwh: 价格(元/度,站点价或总价),"
|
||
"4) electric_fee_kwh: 电费(元/度),"
|
||
"5) service_fee_kwh: 服务费(元/度)。"
|
||
"所有数值以数字返回,例如 1.1800。若缺失某项则填 null。严格返回纯JSON数组。"
|
||
"注意:如果某行价格信息为空或表示同上,请尝试复用上一行的价格信息。"
|
||
)
|
||
|
||
loop = asyncio.get_event_loop()
|
||
|
||
def _do_request(url_val):
|
||
return XinDianTuReadImageKit._client.chat.completions.create(
|
||
model=VL_MODEL_NAME,
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {"url": url_val},
|
||
},
|
||
{"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
|
||
{"type": "text", "text": prompt},
|
||
],
|
||
},
|
||
],
|
||
)
|
||
|
||
try:
|
||
# 在线程池中同步调用 OpenAI 兼容接口,避免阻塞事件循环
|
||
resp = await loop.run_in_executor(None, lambda: _do_request(image_url))
|
||
except BadRequestError as e:
|
||
# 尝试捕获 DataInspection 错误并进行 Base64 回退
|
||
err_code = getattr(e, 'code', '') or ''
|
||
if not err_code and hasattr(e, 'body') and isinstance(e.body, dict):
|
||
err_code = e.body.get('code', '')
|
||
|
||
# 如果是媒体格式或数据检查错误
|
||
if 'InvalidParameter.DataInspection' in str(err_code) or 'media format' in str(e).lower():
|
||
logger.warning(f"Image URL rejected ({err_code}). Attempting Base64 fallback: {image_url}")
|
||
base64_str = await XinDianTuReadImageKit._download_as_base64(image_url)
|
||
if base64_str:
|
||
# 简单推断格式,默认 jpeg
|
||
ext = "jpeg"
|
||
lower_url = image_url.lower()
|
||
if ".png" in lower_url:
|
||
ext = "png"
|
||
elif ".webp" in lower_url:
|
||
ext = "webp"
|
||
elif ".jpg" in lower_url or ".jpeg" in lower_url:
|
||
ext = "jpeg"
|
||
|
||
data_uri = f"data:image/{ext};base64,{base64_str}"
|
||
resp = await loop.run_in_executor(None, lambda: _do_request(data_uri))
|
||
else:
|
||
logger.error("Base64 download failed during fallback.")
|
||
return []
|
||
else:
|
||
logger.error(f"API BadRequestError: {e}")
|
||
return []
|
||
except Exception as e:
|
||
logger.error(f"API Unexpected Error: {e}")
|
||
return []
|
||
|
||
content = resp.choices[0].message.content or ""
|
||
raw = XinDianTuReadImageKit._extract_json(content)
|
||
|
||
try:
|
||
# 反序列化模型返回的 JSON
|
||
rows = json.loads(raw)
|
||
if not isinstance(rows, list):
|
||
return []
|
||
norm = []
|
||
for r in rows:
|
||
if not isinstance(r, dict):
|
||
continue
|
||
# 兼容不同来源字段命名,统一为目标键
|
||
start = r.get("start")
|
||
end = r.get("end")
|
||
price = r.get("price_kwh")
|
||
elec = r.get("electric_fee_kwh")
|
||
serv = r.get("service_fee_kwh")
|
||
# station_name = station_name
|
||
# 将字符串数字安全转换为 float;缺失则为 None
|
||
norm.append({
|
||
"station_name": station_name,
|
||
"start": start,
|
||
"end": end,
|
||
"price_kwh": float(price) if isinstance(price, (int, float, str)) and str(price) else None,
|
||
"electric_fee_kwh": float(elec) if isinstance(elec, (int, float, str)) and str(elec) else None,
|
||
"service_fee_kwh": float(serv) if isinstance(serv, (int, float, str)) and str(serv) else None,
|
||
})
|
||
|
||
for i in range(1, len(norm)):
|
||
curr = norm[i]
|
||
prev = norm[i - 1]
|
||
for k in ("price_kwh", "electric_fee_kwh", "service_fee_kwh"):
|
||
if curr.get(k) is None and prev.get(k) is not None:
|
||
curr[k] = prev[k]
|
||
|
||
return norm
|
||
except Exception as e:
|
||
logger.error(f"Error parsing JSON: {e}")
|
||
logger.error(f"Raw content: {raw}")
|
||
return []
|
||
|
||
@staticmethod
|
||
def _to_minutes(t: str) -> int:
|
||
"""
|
||
将 "HH:MM" 转为分钟数(0-1440)
|
||
说明:
|
||
- 特殊处理 "24:00" -> 1440,方便区间闭合处理
|
||
- 非法格式返回 0
|
||
"""
|
||
if not t:
|
||
return 0
|
||
try:
|
||
h, m = t.split(":")
|
||
h = int(h)
|
||
m = int(m)
|
||
if h == 24 and m == 0:
|
||
return 24 * 60
|
||
return h * 60 + m
|
||
except Exception:
|
||
return 0
|
||
|
||
@staticmethod
|
||
def _fmt(t: int) -> str:
|
||
"""
|
||
将分钟数格式化为 "HH:MM"
|
||
"""
|
||
h = t // 60
|
||
m = t % 60
|
||
return f"{h:02d}:{m:02d}"
|
||
|
||
@staticmethod
|
||
def expand_schedule_to_hourly(rows: list) -> list:
|
||
"""
|
||
将时段列表按小时边界拆分
|
||
|
||
参数:
|
||
rows: parse_price_schedule 返回的时段列表
|
||
|
||
返回:
|
||
每小时一条数据的列表,区间为闭开 [start, end) 的连续小时段
|
||
说明:
|
||
- 例如 05:00-08:00 -> 05:00-06:00, 06:00-07:00, 07:00-08:00
|
||
- 保留每小时的价格、电费、服务费不变
|
||
"""
|
||
hourly = []
|
||
for r in rows:
|
||
# 起止时间转分钟
|
||
s = XinDianTuReadImageKit._to_minutes(r.get("start"))
|
||
e = XinDianTuReadImageKit._to_minutes(r.get("end"))
|
||
if e <= s:
|
||
continue
|
||
cur = s
|
||
while cur < e:
|
||
# 下一小时边界;不超过区间终点
|
||
nxt = min(e, ((cur // 60) + 1) * 60)
|
||
hourly.append({
|
||
"start": XinDianTuReadImageKit._fmt(cur),
|
||
"end": XinDianTuReadImageKit._fmt(nxt),
|
||
"price_kwh": r.get("price_kwh"),
|
||
"electric_fee_kwh": r.get("electric_fee_kwh"),
|
||
"service_fee_kwh": r.get("service_fee_kwh"),
|
||
})
|
||
cur = nxt
|
||
return hourly
|
||
|
||
@staticmethod
|
||
async def _fetch_md5(url: str) -> str:
|
||
"""
|
||
下载图片并计算 MD5(用于内容去重)
|
||
|
||
返回:
|
||
32位十六进制 MD5 字符串;失败返回空字符串
|
||
"""
|
||
try:
|
||
async with aiohttp.ClientSession() as session:
|
||
async with session.get(url) as resp:
|
||
if resp.status != 200:
|
||
return ""
|
||
content = await resp.read()
|
||
return hashlib.md5(content).hexdigest()
|
||
except Exception:
|
||
return ""
|
||
|
||
@staticmethod
|
||
async def parse_address(station_name: str, image_url: str, device_info: dict = None) -> dict:
|
||
"""
|
||
解析图片中的充电站地址信息,同时识别“全部时段”按钮坐标
|
||
|
||
参数:
|
||
station_name:场站名称
|
||
image_url: 图片地址
|
||
device_info: 设备信息(可选)
|
||
|
||
返回:
|
||
字典,包含 address 字段以及 uia_center_x/uia_center_y (如果找到按钮)
|
||
"""
|
||
if device_info is None:
|
||
logger.warning("未提供动态设备信息,使用通用回退配置。")
|
||
device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO
|
||
|
||
# 启动寻找“全部时段”按钮的任务,为后续可能的点击做准备
|
||
button_task = asyncio.create_task(XinDianTuReadImageKit.find_all_time_button_coordinate(image_url, device_info))
|
||
|
||
prompt = (
|
||
"仅输出JSON对象(不含任何说明文字)。"
|
||
"任务1:识别图片中充电站的完整名称(full_station_name)。"
|
||
f"提示:列表中看到的名称可能是截断的(例如“{station_name}”),请在图片上方找到最匹配的完整名称。"
|
||
"任务2:识别充电站的详细地址(address)。"
|
||
"寻找规则:地址通常紧跟在场站名称下方,或者在‘距离’图标(定位小图表)附近,或者在带有‘导航’按钮的同一行。"
|
||
"返回包含 full_station_name 和 address 字段的JSON对象,例如 {\"full_station_name\": \"完整名称\", \"address\": \"详细地址\"}。"
|
||
"如果找不到,对应字段返回空字符串。"
|
||
"严格返回纯JSON格式。"
|
||
)
|
||
|
||
loop = asyncio.get_event_loop()
|
||
resp = await loop.run_in_executor(
|
||
None,
|
||
lambda: XinDianTuReadImageKit._client.chat.completions.create(
|
||
model=VL_MODEL_NAME,
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "image_url",
|
||
"image_url": {"url": image_url},
|
||
},
|
||
{"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
|
||
{"type": "text", "text": prompt},
|
||
],
|
||
},
|
||
],
|
||
)
|
||
)
|
||
|
||
content = resp.choices[0].message.content or ""
|
||
raw = XinDianTuReadImageKit._extract_json(content)
|
||
|
||
result = {}
|
||
try:
|
||
data = json.loads(raw)
|
||
if isinstance(data, dict):
|
||
result = data
|
||
except Exception as e:
|
||
logger.error(f"Error parsing address JSON: {e}")
|
||
|
||
# 等待并合并按钮坐标结果
|
||
try:
|
||
button_result = await button_task
|
||
if button_result:
|
||
result.update(button_result)
|
||
except Exception as e:
|
||
logger.error(f"Error in button coordinate task: {e}")
|
||
|
||
return result
|
||
|
||
@staticmethod
|
||
async def parse_price_schedule_multi(station_name: str, image_urls: list, device_info: dict = None) -> list:
|
||
"""
|
||
多图解析(按图片内容 MD5 去重)并合并时段结果
|
||
|
||
参数:
|
||
image_urls: 多张价格表图片的 URL 列表
|
||
device_info: 设备信息(可选)
|
||
|
||
逻辑:
|
||
1. 逐张下载并计算 MD5,去重得到唯一图片集合
|
||
2. 对每张唯一图片解析价格时段
|
||
3. 将所有图片的时段行合并为一个列表返回
|
||
|
||
返回:
|
||
合并后的时段列表(未按小时拆分)
|
||
"""
|
||
if not image_urls:
|
||
return []
|
||
# MD5 -> URL 的映射,用于去重
|
||
md5_to_url = {}
|
||
for u in image_urls:
|
||
m = await XinDianTuReadImageKit._fetch_md5(u)
|
||
if m and m not in md5_to_url:
|
||
md5_to_url[m] = u
|
||
unique_urls = list(md5_to_url.values())
|
||
# 合并时段结果
|
||
combined = []
|
||
for u in unique_urls:
|
||
rows = await XinDianTuReadImageKit.parse_price_schedule(station_name, u, device_info=device_info)
|
||
if rows:
|
||
combined.extend(rows)
|
||
return combined
|
||
|
||
@staticmethod
|
||
def hourly_full_day(rows: list) -> list:
|
||
"""
|
||
将时段列表规整为全天24个整点小时段
|
||
|
||
参数:
|
||
rows: 原始时段列表(可来自多图合并)
|
||
|
||
返回:
|
||
固定24条记录(00:00-01:00 到 23:00-24:00),
|
||
若某小时未被任何时段覆盖,则费用为 None
|
||
说明:
|
||
- 选择覆盖该小时段的时段(若多个,则选择重叠时间最长的一个)
|
||
- 保证返回结构完整,便于后续消费端显示或补全
|
||
"""
|
||
# 预处理:转换为分钟区间
|
||
intervals = []
|
||
for r in rows:
|
||
s = XinDianTuReadImageKit._to_minutes(r.get("start"))
|
||
e = XinDianTuReadImageKit._to_minutes(r.get("end"))
|
||
if e <= s:
|
||
continue
|
||
s = max(0, s)
|
||
e = min(1440, e)
|
||
intervals.append({
|
||
"s": s, "e": e,
|
||
"price_kwh": r.get("price_kwh"),
|
||
"electric_fee_kwh": r.get("electric_fee_kwh"),
|
||
"service_fee_kwh": r.get("service_fee_kwh"),
|
||
})
|
||
intervals.sort(key=lambda x: (x["s"], x["e"]))
|
||
|
||
result = []
|
||
for h in range(24):
|
||
hs = h * 60
|
||
he = (h + 1) * 60
|
||
best = None
|
||
best_overlap = 0
|
||
for it in intervals:
|
||
overlap = max(0, min(he, it["e"]) - max(hs, it["s"]))
|
||
if overlap > best_overlap:
|
||
best_overlap = overlap
|
||
best = it
|
||
result.append({
|
||
"start": XinDianTuReadImageKit._fmt(hs),
|
||
"end": XinDianTuReadImageKit._fmt(he),
|
||
"price_kwh": best["price_kwh"] if best else None,
|
||
"electric_fee_kwh": best["electric_fee_kwh"] if best else None,
|
||
"service_fee_kwh": best["service_fee_kwh"] if best else None,
|
||
})
|
||
return result
|
||
|
||
|
||
async def test1():
|
||
# 测试新电途第一层结构的读取
|
||
url = "https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Temp/8fd79c68-fec6-4ca7-8d8e-fbff3c6862c8.jpg"
|
||
print(f"Testing First Level with URL: {url}")
|
||
result = await XinDianTuReadImageKit.parse_first_level_image_url(url)
|
||
print("First Level Result:")
|
||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||
|
||
|
||
async def test2(station_name: str):
|
||
# 测试新电途第二层结构的读取
|
||
url = "https://dsideal.obs.myhuaweicloud.com/HuangHai/Temp/SecondPage.jpg"
|
||
print(f"Testing address extraction from: {url}")
|
||
result = await XinDianTuReadImageKit.parse_address(station_name, url)
|
||
result["station_name"] = station_name
|
||
print("Address result:")
|
||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||
|
||
|
||
async def test3(station_name: str):
|
||
# 测试新电途价格表图片的解析
|
||
samples = [
|
||
"https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Temp/XinDianTu/1.jpg",
|
||
"https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Temp/XinDianTu/2.jpg",
|
||
]
|
||
rows = await XinDianTuReadImageKit.parse_price_schedule_multi(station_name, samples)
|
||
hourly = XinDianTuReadImageKit.hourly_full_day(rows)
|
||
print(json.dumps(hourly, ensure_ascii=False, indent=2))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
#asyncio.run(test1())
|
||
station_name = '吉林省看守所充电站'
|
||
#asyncio.run(test2(station_name))
|
||
asyncio.run(test3(station_name))
|