aiData/Apps/XinDianTu/XinDianTuReadImageKit.py

import numpy as np
from PIL import Image
import os
import asyncio
import hashlib
import json
import aiohttp
import logging
import base64
from openai import OpenAI, BadRequestError
from Config.Config import (
    ALY_LLM_API_KEY, VL_MODEL_NAME, VL_MODEL_NAME_AD,
    SAFE_EXCLUDE_RATIO, FALLBACK_WIDTH, FALLBACK_HEIGHT,
    BOTTOM_SAFE_EXCLUDE_RATIO
)

logger = logging.getLogger(__name__)

class XinDianTuReadImageKit:
    _client = OpenAI(
        api_key=ALY_LLM_API_KEY,
        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
    )

    # 通用回退设备信息，仅在无法动态获取设备信息时使用
    _FALLBACK_DEVICE_INFO = {
        "displayWidth": FALLBACK_WIDTH,
        "displayHeight": FALLBACK_HEIGHT,
        "productName": "generic"
    }

    _prompt = (
        "仅输出JSON数组（不含任何说明文字），按从左到右、从上到下的顺序识别图片中的充电站区域。识别规则如下：\n"
        "1. 必须是卡片形式的充电站信息区域。\n"
        "2. 每一个卡片必须同时具备以下所有要素，否则严禁识别：\n"
        "   - 场站名称 (station_name)；\n"
        "   - 距离信息 (distance, 例如 '5.3km')，位于卡片右上角；\n"
        "   - 金额/电费 (price，例如 '0.8490')；\n"
        "   - 充电枪信息 (piles，包含'超'、'快'或'慢'的类型、总枪数和空闲枪数，例如 '快 闲4/4')。\n"
        "3. 如果缺少上述任何一项要素（例如只有名称和距离，但没有电费或枪数信息），说明它不是真正的场站卡片（可能是广告或功能入口），请直接跳过。\n"
        "\n"
        "JSON对象字段要求：\n"
        "1. b_use: 状态标识（1或0）。如果场站名称为灰色或带有“暂停使用”等标签，则为0，否则为1。\n"
        "2. station_name: 场站名称；\n"
        "3. price: 一度电的价格（数字）；\n"
        "4. pro_price: Pro会员价格（数字），无则为null；\n"
        "5. piles: 充电枪列表 [{type: '快', free: 4, total: 4}]；\n"
        "6. parking: 停车费用描述（通常在'P'图标后，例如 '收费停车：以场站实际收费规则为准' 或 '限时免费停车...'）。\n"
        "7. distance: 距离信息字符串（例如 '5.3km'）。\n"
        "8. bounds: {x1,y1,x2,y2} 区域像素坐标（0-1000）；\n"
        "9. bounds_norm: {left,top,right,bottom} 归一化坐标(0-1)；\n"
        "10. station_name_bounds: 场站名称文字区域坐标 {x1,y1,x2,y2}（0-1000）；\n"
        "11. station_name_bounds_norm: 场站名称文字归一化坐标(0-1)。\n"
        "\n"
        "重要约束（违反者不予识别）：\n"
        "A. 严禁识别广告位和筛选标签。如“夜间免停”、“洗手间”、“不限车长”、“不限车高”、“组团”、“综合排序”等均不是场站。\n"
        "B. 真正的场站卡片必须是一个横跨屏幕的大卡片，包含：场站名称（大号加粗）、金额（¥开头）、距离（km结尾）、充电枪状态（闲x/x）。\n"
        "C. 严禁将屏幕中间的筛选标签误认为场站卡片。\n"
        "\n"
        "严格返回纯JSON格式。"
    )

    @staticmethod
    def _extract_json(text: str) -> str:
        if not text:
            return "[]"

        cleaned = text.strip()
        if "```" in cleaned:
            lines = []
            for line in cleaned.splitlines():
                if line.strip().startswith("```"):
                    continue
                lines.append(line)
            cleaned = "\n".join(lines).strip()

        decoder = json.JSONDecoder()

        pos = 0
        while pos < len(cleaned):
            idx_dict = cleaned.find("{", pos)
            idx_list = cleaned.find("[", pos)

            candidates = [i for i in (idx_dict, idx_list) if i != -1]
            if not candidates:
                break

            start = min(candidates)
            snippet = cleaned[start:]
            try:
                _, end = decoder.raw_decode(snippet)
                return snippet[:end]
            except json.JSONDecodeError:
                pos = start + 1
                continue

        return "[]"

    @staticmethod
    def _add_center(obj, device_info):
        return XinDianTuReadImageKit._add_click_point(obj, device_info, anchor="center")

    @staticmethod
    def _add_click_point(obj, device_info, anchor: str = "center"):
        # 获取显示宽高，增加容错
        display_width = float(device_info.get("displayWidth") or device_info.get("width") or 1080)
        display_height = float(device_info.get("displayHeight") or device_info.get("height") or 2400)

        bounds = obj.get("bounds")
        bn = obj.get("bounds_norm")
        text_bounds = obj.get("station_name_bounds")
        text_bn = obj.get("station_name_bounds_norm")
        uia_x = None
        uia_y = None

        def get_pixel_coords(b_data, d_w, d_h):
            if not b_data:
                return None

            # Extract values based on dict or list
            if isinstance(b_data, list) and len(b_data) == 4:
                v1, v2, v3, v4 = b_data
            elif isinstance(b_data, dict):
                # 兼容多种可能的键名
                v1 = b_data.get("left") if b_data.get("left") is not None else b_data.get("x1")
                v2 = b_data.get("top") if b_data.get("top") is not None else b_data.get("y1")
                v3 = b_data.get("right") if b_data.get("right") is not None else b_data.get("x2")
                v4 = b_data.get("bottom") if b_data.get("bottom") is not None else b_data.get("y2")

                if any(v is None for v in (v1, v2, v3, v4)):
                    return None
            else:
                return None

            try:
                v1, v2, v3, v4 = float(v1), float(v2), float(v3), float(v4)
            except (ValueError, TypeError):
                return None

            max_v = max(v1, v2, v3, v4)

            # 1. 0-1 归一化坐标
            if max_v <= 1.05:
                x1, y1, x2, y2 = v1 * d_w, v2 * d_h, v3 * d_w, v4 * d_h
            # 2. 0-1000 归一化坐标 (Qwen-VL 常用)
            elif max_v <= 1005:
                x1, y1, x2, y2 = (v1 / 1000.0) * d_w, (v2 / 1000.0) * d_h, (v3 / 1000.0) * d_w, (v4 / 1000.0) * d_h
            # 3. 绝对像素坐标
            else:
                x1, y1, x2, y2 = v1, v2, v3, v4

            return min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2)

        # 优先使用场站名称区域进行点击 (最安全)
        coords = get_pixel_coords(text_bn, display_width, display_height) or get_pixel_coords(text_bounds, display_width, display_height)
        if coords:
            x1, y1, x2, y2 = coords
            uia_x = int(x1 + (x2 - x1) / 2)
            uia_y = int(y1 + (y2 - y1) / 2)
            logger.info(f"坐标计算: 使用文字区域 -> ({uia_x}, {uia_y}) | 区域: {coords} | 屏幕: {display_width}x{display_height}")

        # 备选：使用整个卡片区域
        if uia_x is None:
            coords = get_pixel_coords(bn, display_width, display_height) or get_pixel_coords(bounds, display_width, display_height)
            if coords:
                x1, y1, x2, y2 = coords
                w, h = x2 - x1, y2 - y1
                if anchor == "top_left":
                    uia_x = int(x1 + max(5.0, w * 0.15))
                    uia_y = int(y1 + max(5.0, h * 0.20))
                else:
                    # 默认中心点，但稍微偏上一点，避开底部可能的按钮
                    uia_x = int(x1 + w / 2)
                    uia_y = int(y1 + h * 0.4)
                logger.info(f"坐标计算: 使用卡片区域 -> ({uia_x}, {uia_y}) | 区域: {coords} | 屏幕: {display_width}x{display_height}")

        # --- 安全过滤：过滤掉屏幕顶部的点击坐标 (通常是广告或菜单) ---
        if uia_y is not None:
            # 如果点击点在屏幕顶部 SAFE_EXCLUDE_RATIO 范围内，极大概率是误触广告位，将其排除
            if uia_y < (display_height * SAFE_EXCLUDE_RATIO):
                logger.warning(f"安全排除: 坐标 ({uia_x}, {uia_y}) 位于屏幕顶部 {int(SAFE_EXCLUDE_RATIO*100)}% 区域，疑似广告或菜单，已忽略。")
                return None

            # 如果点击点在屏幕底部 BOTTOM_SAFE_EXCLUDE_RATIO 范围内，极大概率是误触底部功能区（如扫码充电），将其排除
            if uia_y > (display_height * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
                logger.warning(f"安全排除: 坐标 ({uia_x}, {uia_y}) 位于屏幕底部 {int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}% 区域，疑似底部功能区，已忽略。")
                return None

        if uia_x is not None and uia_y is not None:
            obj["uia_center_x"] = uia_x
            obj["uia_center_y"] = uia_y

        # Clean up temporary fields
        for k in ["bounds", "bounds_norm", "station_name_bounds", "station_name_bounds_norm"]:
            if k in obj:
                del obj[k]
        return obj

    @staticmethod
    async def find_all_time_button_coordinate(image_url: str, device_info: dict = None) -> dict:
        if device_info is None:
            logger.warning("未提供动态设备信息，使用通用回退配置。")
            device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO

        prompt = (
            "仅输出JSON对象（不含任何说明文字），请找到图片中带有“全部时段”字样的按钮区域（通常在价格表下方，是一个带有右箭头的文字按钮）。\n"
            "返回格式示例：\n"
            "{\n"
            '  "bounds": {"x1": 100, "y1": 200, "x2": 300, "y2": 400}, \n'
            '  "bounds_norm": {"left": 0.1, "top": 0.2, "right": 0.3, "bottom": 0.4}\n'
            "}\n"
            "注意：bounds应使用0-1000的归一化坐标空间。\n"
            "如果未找到，返回空JSON {}。"
        )

        loop = asyncio.get_event_loop()
        resp = await loop.run_in_executor(
            None,
            lambda: XinDianTuReadImageKit._client.chat.completions.create(
                model=VL_MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": image_url
                                },
                            },
                            {"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
                            {"type": "text", "text": prompt},
                        ],
                    },
                ],
            )
        )

        content = resp.choices[0].message.content or ""
        raw = XinDianTuReadImageKit._extract_json(content)

        try:
            data = json.loads(raw)
            if isinstance(data, dict) and (data.get("bounds") or data.get("bounds_norm")):
                data = XinDianTuReadImageKit._add_center(data, device_info)
                # 只返回中心坐标
                return {
                    "uia_center_x": data.get("uia_center_x"),
                    "uia_center_y": data.get("uia_center_y")
                }
            return {}
        except Exception as e:
            logger.error(f"Error parsing JSON: {e}")
            logger.error(f"Raw content: {raw}")
            return {}

    @staticmethod
    async def find_station_coordinate_first_page(image_url: str, station_name: str, device_info: dict = None) -> dict:
        if device_info is None:
            logger.warning("未提供动态设备信息，使用通用回退配置。")
            device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO

        prompt = (
            "仅输出JSON对象（不含任何说明文字）。"
            "请在图片中找到场站名称完全匹配“" + str(station_name) + "”的那一行/卡片，"
            "返回该场站名称文字本身的区域坐标（优先返回归一化坐标）。"
            "返回格式示例："
            "{\"station_name_bounds\": {\"x1\": 100, \"y1\": 200, \"x2\": 300, \"y2\": 240}, "
            "\"station_name_bounds_norm\": {\"left\": 0.1, \"top\": 0.2, \"right\": 0.3, \"bottom\": 0.24}}"
            "注意：bounds使用0-1000归一化坐标空间；如果找不到，返回空JSON {}。"
        )

        loop = asyncio.get_event_loop()
        resp = await loop.run_in_executor(
            None,
            lambda: XinDianTuReadImageKit._client.chat.completions.create(
                model=VL_MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "image_url", "image_url": {"url": image_url}},
                            {"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
                            {"type": "text", "text": prompt},
                        ],
                    },
                ],
            )
        )

        content = resp.choices[0].message.content or ""
        raw = XinDianTuReadImageKit._extract_json(content)

        try:
            data = json.loads(raw)
            if isinstance(data, dict) and (data.get("station_name_bounds") or data.get("station_name_bounds_norm") or data.get("bounds") or data.get("bounds_norm")):
                data = XinDianTuReadImageKit._add_click_point(data, device_info, anchor="station_text")
                return {
                    "uia_center_x": data.get("uia_center_x"),
                    "uia_center_y": data.get("uia_center_y")
                }
            return {}
        except Exception:
            return {}

    @staticmethod
    async def parse_first_level_image_url(image_url: str, device_info: dict = None) -> list:
        if device_info is None:
            logger.warning("未提供动态设备信息，使用通用回退配置。")
            device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO

        loop = asyncio.get_event_loop()
        resp = await loop.run_in_executor(
            None,
            lambda: XinDianTuReadImageKit._client.chat.completions.create(
                model=VL_MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": image_url
                                },
                            },
                            {"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
                            {"type": "text", "text": XinDianTuReadImageKit._prompt},
                        ],
                    },
                ],
            )
        )

        content = resp.choices[0].message.content or ""
        raw = XinDianTuReadImageKit._extract_json(content)

        try:
            data = json.loads(raw)
            if isinstance(data, list):
                # 过滤掉 None (即被 _add_click_point 排除掉的顶部项)
                data = [XinDianTuReadImageKit._add_click_point(x, device_info, anchor="station_text") for x in data if x.get("b_use", 1) == 1]
                data = [x for x in data if x is not None]
            elif isinstance(data, dict):
                if data.get("b_use", 1) == 1:
                    data = XinDianTuReadImageKit._add_click_point(data, device_info, anchor="station_text")
                    data = [data] if data is not None else []
                else:
                    data = []

            # Clean up temporary field b_use if it still exists
            if isinstance(data, list):
                for item in data:
                    if "b_use" in item:
                        del item["b_use"]
            elif isinstance(data, dict) and "b_use" in data:
                del data["b_use"]

            # If dict result, wrap in list for consistency if needed, but keeping as is based on original logic logic implies list return
            if isinstance(data, dict):
                return [data] if data else []

            if isinstance(data, list) and data:
                # 移除旧的关键词过滤逻辑，完全信任视觉模型的识别结果
                # 如果未来发现模型识别了太多广告，可以在 Prompt 中强化 "严禁识别广告" 的约束
                filtered = []
                for item in data:
                    if not isinstance(item, dict):
                        continue
                    name = item.get("station_name")
                    if not name:
                        continue
                    filtered.append(item)
                data = filtered

            if isinstance(data, list) and data:
                tasks = []
                idxs = []
                for i, item in enumerate(data):
                    if not isinstance(item, dict):
                        continue
                    if item.get("uia_center_x") is None or item.get("uia_center_y") is None:
                        name = item.get("station_name")
                        if name:
                            idxs.append(i)
                            tasks.append(asyncio.create_task(
                                XinDianTuReadImageKit.find_station_coordinate_first_page(image_url, name, device_info)
                            ))
                if tasks:
                    results = await asyncio.gather(*tasks, return_exceptions=True)
                    for i, r in zip(idxs, results):
                        if isinstance(r, dict) and r.get("uia_center_x") is not None and r.get("uia_center_y") is not None:
                            data[i]["uia_center_x"] = r.get("uia_center_x")
                            data[i]["uia_center_y"] = r.get("uia_center_y")

                for item in data:
                    if isinstance(item, dict):
                        item["uia_center_x"] = 100
                        item.setdefault("uia_center_y", None)

            return data
        except Exception as e:
            logger.error(f"Error parsing JSON: {e}")
            logger.error(f"Raw content: {raw}")
            return []

    @staticmethod
    async def parse_hybrid_image(image_path, uploader, cdn_domain):
        """
        统一入口：混合识别模式
        """
        return await XinDianTuReadImageKit.get_stations_hybrid(image_path, uploader, cdn_domain)

    @staticmethod
    async def parse_vl_image(vl_image_url, json_metadata, device_info=None):
        """
        基于 _vl.jpg (带绿框) 和 JSON 元数据进行识别
        """
        if not json_metadata or "cards" not in json_metadata:
            return []

        cards_meta = json_metadata["cards"]
        # 按 id 或 Y 坐标排序，确保顺序一致 (Kit 生成时已经是 top-down)
        cards_meta.sort(key=lambda x: x["rect"][1])

        prompt = (
            "图片中用绿色矩形框标记了若干个充电站卡片区域。\n"
            "请按从上到下的顺序，依次识别每个绿色框内的场站信息，并返回一个JSON数组。\n"
            "数组中元素的顺序必须与图片中绿色框从上到下的顺序严格一致。\n"
            "如果某个框内不是有效的场站卡片（例如是广告），请返回null或空对象，不要跳过顺序。\n"
            "\n"
            "每个JSON对象包含以下字段：\n"
            "1. station_name: 场站名称；\n"
            "2. price: 价格（数字）；\n"
            "3. pro_price: Pro会员价（数字，无则null）；\n"
            "4. piles: 充电枪描述字符串（例如 '快 闲4/4'）；\n"
            "5. tags: 标签列表（如 ['限时免费']）；\n"
            "6. parking: 停车费用描述（通常在'P'图标后，例如 '收费停车：以场站实际收费规则为准' 或 '限时免费停车...'）。\n"
            "7. distance: 距离信息字符串（例如 '5.3km'）。\n"
            "\n"
            "严格返回纯JSON格式。"
        )

        loop = asyncio.get_event_loop()
        resp = await loop.run_in_executor(
            None,
            lambda: XinDianTuReadImageKit._client.chat.completions.create(
                model=VL_MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "image_url", "image_url": {"url": vl_image_url}},
                            {"type": "text", "text": prompt},
                        ],
                    },
                ],
            )
        )

        content = resp.choices[0].message.content or ""
        raw = XinDianTuReadImageKit._extract_json(content)

        final_stations = []
        try:
            llm_data = json.loads(raw)
            if isinstance(llm_data, list):
                # 尝试与 cards_meta 对齐
                count = min(len(llm_data), len(cards_meta))
                for i in range(count):
                    item = llm_data[i]
                    meta = cards_meta[i]

                    if not item or not isinstance(item, dict):
                        continue

                    name = item.get("station_name")
                    if not name:
                        continue

                    # 注入元数据中的点击坐标
                    click_pt = meta.get("click_point", [0, 0])
                    cx, cy = click_pt

                    # 安全检查
                    img_h = 2400
                    if device_info:
                        img_h = float(device_info.get("displayHeight") or device_info.get("height") or 2400)
                    else:
                        img_h = json_metadata.get("height", 2400)

                    # 顶部安全排除
                    if cy < (img_h * SAFE_EXCLUDE_RATIO):
                        logger.warning(f"VL安全排除: 坐标 ({cx}, {cy}) 位于屏幕顶部 {int(SAFE_EXCLUDE_RATIO*100)}% 区域，已忽略。")
                        continue

                    # 底部安全排除
                    if cy > (img_h * (1 - BOTTOM_SAFE_EXCLUDE_RATIO)):
                        logger.warning(f"VL安全排除: 坐标 ({cx}, {cy}) 位于屏幕底部 {int(BOTTOM_SAFE_EXCLUDE_RATIO*100)}% 区域，疑似底部功能区，已忽略。")
                        continue

                    item["uia_center_x"] = cx
                    item["uia_center_y"] = cy
                    item["rect"] = meta.get("rect")

                    final_stations.append(item)
            else:
                logger.warning(f"LLM return format error: expected list, got {type(llm_data)}")

        except Exception as e:
            logger.error(f"Error parsing VL response: {e}")

        return final_stations

    @staticmethod
    async def get_stations_hybrid(image_path, uploader, cdn_domain):
        """
        混合识别模式：图形学切片 + 大模型小图 OCR
        """
        # 1. 图形学切片
        segments = XinDianTuReadImageKit.get_card_segments(image_path)
        if not segments:
            return []

        # 2. 对每个切片并行进行 OCR
        tasks = []
        for seg in segments:
            # 上传切片
            patch_path = seg["patch_path"]
            remote_path = f"tmp/patches/{os.path.basename(patch_path)}"
            uploader.upload_file(patch_path, remote_path)
            patch_url = f"{cdn_domain}/{remote_path}"

            # 记录上传后的 URL 供识别使用
            seg["patch_url"] = patch_url
            tasks.append(XinDianTuReadImageKit.recognize_card_text(patch_url))

        # 等待所有 OCR 完成
        results = await asyncio.gather(*tasks)

        # 3. 组装结果
        final_stations = []
        for i, res in enumerate(results):
            name = res.get("station_name")
            if name and name != "未知":
                seg = segments[i]
                res["x"] = seg["center_x"]
                res["y"] = seg["center_y"]
                final_stations.append(res)

        # 4. 后置过滤 (复用原有过滤逻辑)
        if final_stations:
            processed_excluded_titles = {str(x).replace(" ", "").strip() for x in STATION_EXCLUDED_TITLES}
            filtered = []
            for item in final_stations:
                name = item.get("station_name")
                normalized_name = str(name).replace(" ", "").strip()
                if normalized_name in processed_excluded_titles:
                    continue
                if any(kw in normalized_name for kw in STATION_BLACKLIST_KEYWORDS):
                    continue
                filtered.append(item)
            final_stations = filtered

        return final_stations

    @staticmethod
    @staticmethod
    def get_card_segments(image_path, output_dir="./Debug/Patches"):
        """
        基于水平灰色分割带 (Divider Band) 切取场站卡片
        返回: list of dict {"patch_path": str, "center_x": int, "center_y": int, "y_range": tuple}
        """
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        try:
            img = Image.open(image_path).convert('RGB')
            width, height = img.size
            img_data = np.array(img)

            logger.info(f"页面分析 - 分辨率: {width}x{height}")

            # 1. 行特征分析：识别分割线 (Divider)
            # 分割线特征：横贯全屏，亮度均匀，通常比纯白(255)稍暗，比文字内容亮
            # 典型值：Mean=242-247, Std<10

            row_types = [] # 0: Unknown/Content, 1: Divider, 2: White/Empty

            # 采样点：左中右
            l_x, m_x, r_x = int(width * 0.05), int(width * 0.5), int(width * 0.95)

            debug_rows = []

            for y in range(height):
                # 避开顶部和底部导航栏 (15% - 85%) - 扩大排除范围以避免误识别 Header/Footer
                if y < height * 0.15 or y > height * 0.85:
                    row_types.append(1) # 视为无关区域 (标记为 Divider 以防止形成 Segment)
                    continue

                row = img_data[y]
                row_mean = np.mean(row)
                row_std = np.std(row)

                # 判定逻辑：
                # 1. 纯白行 (卡片底色) -> mean > 252 (允许微小噪点)
                # 2. 分割线 (Divider) -> 230 < mean < 252 且 std < 15 (颜色均匀)
                # 3. 内容行 (Content) -> mean <= 230 或 (mean > 230 且 std >= 15) (有文字/图片导致方差大)

                if row_mean > 252:
                    r_type = 2 # White/Empty
                elif 230 < row_mean <= 252 and row_std < 15:
                    r_type = 1 # Divider
                else:
                    r_type = 0 # Content

                row_types.append(r_type)

                # Debug log sampling
                if y % 50 == 0:
                    debug_rows.append(f"Row {y}: Mean={row_mean:.1f}, Std={row_std:.1f} -> Type={r_type}")

            if debug_rows:
                logger.info("行特征采样 (调试用):\n" + "\n".join(debug_rows))

            # 2. 聚合连续的 Content 区域 (Type 0 or Type 2 sandwiched by Type 0)
            # 实际上，卡片是由 Divider 分隔开的区域。
            # 我们寻找两个 Divider 之间的区域，且该区域必须包含 Content (Type 0)。

            segments = []

            # 简化状态机：
            # 寻找非 Divider 的连续段
            in_segment = False
            seg_start = -1
            has_content = False

            raw_blocks = []

            for y, r_type in enumerate(row_types):
                if r_type != 1: # Not Divider
                    if not in_segment:
                        in_segment = True
                        seg_start = y
                        has_content = False

                    if r_type == 0:
                        has_content = True
                else: # Is Divider
                    if in_segment:
                        # 结束一段
                        if has_content: # 只有包含内容的段才算
                            raw_blocks.append((seg_start, y))
                        in_segment = False

            # 处理最后一段
            if in_segment and has_content:
                raw_blocks.append((seg_start, len(row_types)))

            # 3. 过滤和后处理
            # 合并距离很近的块？或者过滤太小的块
            base_name = os.path.splitext(os.path.basename(image_path))[0]

            valid_segments = []
            logger.info(f"Initial raw blocks count: {len(raw_blocks)}")

            for i, (y1, y2) in enumerate(raw_blocks):
                h = y2 - y1
                # 过滤太小的块 (可能是杂噪或单纯的文字行)
                if h < 50:
                    # logger.debug(f"Block {i} too small: {h}")
                    continue
                # 过滤太大的块 (可能是全屏错误)
                if h > 1000:
                    continue

                # 再次确认内部是否有足够的“暗像素”（内容）
                # 避免切出纯白的空隙
                region = img_data[y1:y2]
                region_mean = np.mean(region)
                if region_mean > 254.5: # 整体太白 (放宽阈值，避免误杀大面积白色的卡片)
                    logger.info(f"  [-] 忽略区域 {i}: Y({y1}-{y2}), H={h}, 整体太白 (Mean={region_mean:.1f} > 254.5)")
                    continue

                # --- 优化：边缘背景检查与宽度裁剪 (Margin Check & Crop) ---
                # 不再直接拒绝白边，而是尝试计算内容的有效宽度
                # 假设：有效内容行的方差较高，或者亮度显著不同于背景

                # 简单策略：保留全宽，但增加高度限制
                if h < 60: # 稍微降低阈值以测试
                    logger.info(f"  [-] 忽略区域 {i}: Y({y1}-{y2}), H={h}, 高度不足 (<60)")
                    continue

                # 计算左右边距的平均亮度，辅助判断（仅记录日志，不强行过滤）
                margin_w = max(5, int(width * 0.05))
                l_margin = img_data[y1:y2, 0:margin_w]
                r_margin = img_data[y1:y2, width-margin_w:width]
                l_mean = np.mean(l_margin)
                r_mean = np.mean(r_margin)

                # 如果是全宽卡片，左右边缘可能是白色的。
                # 之前的逻辑: if l_mean > 252 or r_mean > 252: continue (导致漏检)
                # 现在移除该逻辑。

                # --- 新增：自动裁剪水平宽度 (Auto Horizontal Crop) ---
                # 尝试找到内容的左右边界 (基于列的方差或亮度差异)
                x1, x2 = 0, width

                # 从左向右扫描
                for x in range(0, int(width * 0.4), 2):
                    col = img_data[y1:y2, x]
                    # 如果这一列不是纯色背景 (std > 5) 或者明显比背景暗 (mean < 245)
                    if np.std(col) > 5 or np.mean(col) < 245:
                        x1 = x
                        break

                # 从右向左扫描
                for x in range(width - 1, int(width * 0.6), -2):
                    col = img_data[y1:y2, x]
                    if np.std(col) > 5 or np.mean(col) < 245:
                        x2 = x + 1
                        break

                # 增加一点 padding
                x1 = max(0, x1 - 10)
                x2 = min(width, x2 + 10)

                # 如果裁剪后宽度太小，可能不是有效卡片
                if (x2 - x1) < width * 0.5:
                     logger.info(f"  [-] 忽略区域 {i}: 裁剪后宽度过小 ({x2-x1})")
                     continue

                # 保存
                patch = img.crop((x1, y1, x2, y2))
                patch_name = f"{base_name}_p{i}_{y1}.jpg"
                patch_path = os.path.join(output_dir, patch_name)
                patch.save(patch_path)

                center_y = (y1 + y2) // 2
                valid_segments.append({
                    "patch_path": os.path.abspath(patch_path),
                    "center_x": (x1 + x2) // 2,
                    "center_y": center_y,
                    "y_range": (y1, y2)
                })
                logger.info(f"  [+] 发现卡片 {i}: Y({y1}-{y2}), H={h}, Crop X({x1}-{x2}), 边缘(L={l_mean:.1f}, R={r_mean:.1f}), 已保存")

            logger.info(f"分析完成：识别到 {len(valid_segments)} 个区域")
            return valid_segments

        except Exception as e:
            logger.error(f"图形学切片失败: {e}", exc_info=True)
            return []

    @staticmethod
    async def recognize_card_text(patch_url):
        """
        对切片进行小图 OCR 识别，提取场站详细信息
        """
        loop = asyncio.get_event_loop()
        resp = await loop.run_in_executor(
            None,
            lambda: XinDianTuReadImageKit._client.chat.completions.create(
                model=VL_MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": "请识别图片中的充电站信息，并以 JSON 格式输出：{\"station_name\": \"...\", \"price\": \"...\", \"piles\": \"空闲数/总数\"}。只输出 JSON，不要有其他文字。"
                            },
                            {"type": "image_url", "image_url": {"url": patch_url}}
                        ]
                    }
                ],
                max_tokens=200
            )
        )
        content = resp.choices[0].message.content.strip()
        # 尝试解析 JSON
        try:
            import json
            import re
            json_match = re.search(r'\{.*\}', content, re.DOTALL)
            if json_match:
                return json.loads(json_match.group())
        except Exception:
            logger.warning(f"OCR 结果解析 JSON 失败: {content}")

        return {"station_name": "未知"}

    @staticmethod
    async def detect_ad(image_url: str, device_info: dict = None) -> dict:
        """
        检测图片中是否存在广告弹窗，并定位关闭按钮坐标

        参数：
            image_url: 图片地址
            device_info: 设备信息

        返回：
            {
                "has_ad": bool,
                "uia_center_x": int|None,
                "uia_center_y": int|None
            }
        """
        if device_info is None:
            logger.warning("未提供动态设备信息，使用通用回退配置。")
            device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO

        prompt = (
            "分析图片中是否存在覆盖在主界面上的广告弹窗（Popup Ad）。"
            "如果存在，请找到关闭该弹窗的按钮（通常是一个带有 'X' 的图标，可能在弹窗的右上角、右下角或正下方）。"
            "仅输出JSON对象（不含任何说明文字），包含以下字段："
            "1. has_ad: 布尔值，是否存在广告弹窗；"
            "2. close_button_bounds: 关闭按钮的像素坐标或归一化坐标(0-1000) {x1,y1,x2,y2}。如果不存在广告则为 null。"
            "注意：只需识别最明显的那个关闭按钮。严格返回纯JSON。"
        )

        loop = asyncio.get_event_loop()
        resp = await loop.run_in_executor(
            None,
            lambda: XinDianTuReadImageKit._client.chat.completions.create(
                model=VL_MODEL_NAME_AD,  # 使用更强的视觉模型
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image_url",
                                "image_url": {"url": image_url},
                            },
                            {"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
                            {"type": "text", "text": prompt},
                        ],
                    },
                ],
            )
        )

        content = resp.choices[0].message.content or ""
        raw = XinDianTuReadImageKit._extract_json(content)

        try:
            data = json.loads(raw)
            has_ad = data.get("has_ad", False)
            if has_ad and data.get("close_button_bounds"):
                # 使用现有的辅助方法转换坐标
                temp_obj = {"bounds": data.get("close_button_bounds")}
                XinDianTuReadImageKit._add_click_point(temp_obj, device_info)
                return {
                    "has_ad": True,
                    "uia_center_x": temp_obj.get("uia_center_x"),
                    "uia_center_y": temp_obj.get("uia_center_y")
                }
            return {"has_ad": False, "uia_center_x": None, "uia_center_y": None}
        except Exception as e:
            logger.error(f"Error parsing ad detection JSON: {e}")
            return {"has_ad": False, "uia_center_x": None, "uia_center_y": None}

    @staticmethod
    async def _download_as_base64(url: str) -> str:
        """Helper to download image and convert to base64 for VL model fallback"""
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(url) as resp:
                    if resp.status != 200:
                        return None
                    content = await resp.read()
                    if not content:
                        return None
                    return base64.b64encode(content).decode('utf-8')
        except Exception as e:
            logger.error(f"Failed to download image for base64 conversion: {e}")
            return None

    @staticmethod
    async def parse_price_schedule(station_name: str, image_url: str, device_info: dict = None) -> list:
        """
        解析价格时段表（整图），提取每一行的时间区间与费用信息

        参数：
            image_url: 图片地址（包含“当前时段电费/全部时段电费”弹窗或列表）
            device_info: 设备信息，用于提升视觉理解一致性（可选）

        返回：
            列表，每个元素为：
            {
              "start": "HH:MM",           开始时间
              "end": "HH:MM",             结束时间
              "price_kwh": float|None,    总价或站点价（元/度）
              "electric_fee_kwh": float|None,  电费（元/度）
              "service_fee_kwh": float|None    服务费（元/度）
            }
        说明：
            - 使用 qwen3-vl-flash 进行视觉解析，提示词约束输出为纯 JSON 数组
            - 若某项缺失则返回 None（保持结构统一）
        """
        if device_info is None:
            logger.warning("未提供动态设备信息，使用通用回退配置。")
            device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO

        # 视觉解析提示词：约束输出字段与格式，避免模型输出说明文字
        prompt = (
            "仅输出JSON数组（不含任何说明文字）。识别图片中所有时段的价格信息，返回每一行："
            "1) start: 开始时间（HH:MM），2) end: 结束时间（HH:MM），"
            "3) price_kwh: 价格（元/度，站点价或总价），"
            "4) electric_fee_kwh: 电费（元/度），"
            "5) service_fee_kwh: 服务费（元/度）。"
            "所有数值以数字返回，例如 1.1800。若缺失某项则填 null。严格返回纯JSON数组。"
            "注意：如果某行价格信息为空或表示同上，请尝试复用上一行的价格信息。"
        )

        loop = asyncio.get_event_loop()

        def _do_request(url_val):
            return XinDianTuReadImageKit._client.chat.completions.create(
                model=VL_MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image_url",
                                "image_url": {"url": url_val},
                            },
                            {"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
                            {"type": "text", "text": prompt},
                        ],
                    },
                ],
            )

        try:
            # 在线程池中同步调用 OpenAI 兼容接口，避免阻塞事件循环
            resp = await loop.run_in_executor(None, lambda: _do_request(image_url))
        except BadRequestError as e:
            # 尝试捕获 DataInspection 错误并进行 Base64 回退
            err_code = getattr(e, 'code', '') or ''
            if not err_code and hasattr(e, 'body') and isinstance(e.body, dict):
                 err_code = e.body.get('code', '')

            # 如果是媒体格式或数据检查错误
            if 'InvalidParameter.DataInspection' in str(err_code) or 'media format' in str(e).lower():
                logger.warning(f"Image URL rejected ({err_code}). Attempting Base64 fallback: {image_url}")
                base64_str = await XinDianTuReadImageKit._download_as_base64(image_url)
                if base64_str:
                    # 简单推断格式，默认 jpeg
                    ext = "jpeg"
                    lower_url = image_url.lower()
                    if ".png" in lower_url:
                        ext = "png"
                    elif ".webp" in lower_url:
                        ext = "webp"
                    elif ".jpg" in lower_url or ".jpeg" in lower_url:
                        ext = "jpeg"

                    data_uri = f"data:image/{ext};base64,{base64_str}"
                    resp = await loop.run_in_executor(None, lambda: _do_request(data_uri))
                else:
                    logger.error("Base64 download failed during fallback.")
                    return []
            else:
                logger.error(f"API BadRequestError: {e}")
                return []
        except Exception as e:
             logger.error(f"API Unexpected Error: {e}")
             return []

        content = resp.choices[0].message.content or ""
        raw = XinDianTuReadImageKit._extract_json(content)

        try:
            # 反序列化模型返回的 JSON
            rows = json.loads(raw)
            if not isinstance(rows, list):
                return []
            norm = []
            for r in rows:
                if not isinstance(r, dict):
                    continue
                # 兼容不同来源字段命名，统一为目标键
                start = r.get("start")
                end = r.get("end")
                price = r.get("price_kwh")
                elec = r.get("electric_fee_kwh")
                serv = r.get("service_fee_kwh")
                # station_name = station_name
                # 将字符串数字安全转换为 float；缺失则为 None
                norm.append({
                    "station_name": station_name,
                    "start": start,
                    "end": end,
                    "price_kwh": float(price) if isinstance(price, (int, float, str)) and str(price) else None,
                    "electric_fee_kwh": float(elec) if isinstance(elec, (int, float, str)) and str(elec) else None,
                    "service_fee_kwh": float(serv) if isinstance(serv, (int, float, str)) and str(serv) else None,
                })

            for i in range(1, len(norm)):
                curr = norm[i]
                prev = norm[i - 1]
                for k in ("price_kwh", "electric_fee_kwh", "service_fee_kwh"):
                    if curr.get(k) is None and prev.get(k) is not None:
                        curr[k] = prev[k]

            return norm
        except Exception as e:
            logger.error(f"Error parsing JSON: {e}")
            logger.error(f"Raw content: {raw}")
            return []

    @staticmethod
    def _to_minutes(t: str) -> int:
        """
        将 "HH:MM" 转为分钟数（0-1440）
        说明：
            - 特殊处理 "24:00" -> 1440，方便区间闭合处理
            - 非法格式返回 0
        """
        if not t:
            return 0
        try:
            h, m = t.split(":")
            h = int(h)
            m = int(m)
            if h == 24 and m == 0:
                return 24 * 60
            return h * 60 + m
        except Exception:
            return 0

    @staticmethod
    def _fmt(t: int) -> str:
        """
        将分钟数格式化为 "HH:MM"
        """
        h = t // 60
        m = t % 60
        return f"{h:02d}:{m:02d}"

    @staticmethod
    def expand_schedule_to_hourly(rows: list) -> list:
        """
        将时段列表按小时边界拆分

        参数：
            rows: parse_price_schedule 返回的时段列表

        返回：
            每小时一条数据的列表，区间为闭开 [start, end) 的连续小时段
        说明：
            - 例如 05:00-08:00 -> 05:00-06:00, 06:00-07:00, 07:00-08:00
            - 保留每小时的价格、电费、服务费不变
        """
        hourly = []
        for r in rows:
            # 起止时间转分钟
            s = XinDianTuReadImageKit._to_minutes(r.get("start"))
            e = XinDianTuReadImageKit._to_minutes(r.get("end"))
            if e <= s:
                continue
            cur = s
            while cur < e:
                # 下一小时边界；不超过区间终点
                nxt = min(e, ((cur // 60) + 1) * 60)
                hourly.append({
                    "start": XinDianTuReadImageKit._fmt(cur),
                    "end": XinDianTuReadImageKit._fmt(nxt),
                    "price_kwh": r.get("price_kwh"),
                    "electric_fee_kwh": r.get("electric_fee_kwh"),
                    "service_fee_kwh": r.get("service_fee_kwh"),
                })
                cur = nxt
        return hourly

    @staticmethod
    async def _fetch_md5(url: str) -> str:
        """
        下载图片并计算 MD5（用于内容去重）

        返回：
            32位十六进制 MD5 字符串；失败返回空字符串
        """
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(url) as resp:
                    if resp.status != 200:
                        return ""
                    content = await resp.read()
                    return hashlib.md5(content).hexdigest()
        except Exception:
            return ""

    @staticmethod
    async def parse_address(station_name: str, image_url: str, device_info: dict = None) -> dict:
        """
        解析图片中的充电站地址信息，同时识别“全部时段”按钮坐标

        参数：
            station_name:场站名称
            image_url: 图片地址
            device_info: 设备信息（可选）

        返回：
            字典，包含 address 字段以及 uia_center_x/uia_center_y (如果找到按钮)
        """
        if device_info is None:
            logger.warning("未提供动态设备信息，使用通用回退配置。")
            device_info = XinDianTuReadImageKit._FALLBACK_DEVICE_INFO

        # 启动寻找“全部时段”按钮的任务，为后续可能的点击做准备
        button_task = asyncio.create_task(XinDianTuReadImageKit.find_all_time_button_coordinate(image_url, device_info))

        prompt = (
            "仅输出JSON对象（不含任何说明文字）。"
            "任务1：识别图片中充电站的完整名称（full_station_name）。"
            f"提示：列表中看到的名称可能是截断的（例如“{station_name}”），请在图片上方找到最匹配的完整名称。"
            "任务2：识别充电站的详细地址（address）。"
            "寻找规则：地址通常紧跟在场站名称下方，或者在‘距离’图标（定位小图表）附近，或者在带有‘导航’按钮的同一行。"
            "返回包含 full_station_name 和 address 字段的JSON对象，例如 {\"full_station_name\": \"完整名称\", \"address\": \"详细地址\"}。"
            "如果找不到，对应字段返回空字符串。"
            "严格返回纯JSON格式。"
        )

        loop = asyncio.get_event_loop()
        resp = await loop.run_in_executor(
            None,
            lambda: XinDianTuReadImageKit._client.chat.completions.create(
                model=VL_MODEL_NAME,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image_url",
                                "image_url": {"url": image_url},
                            },
                            {"type": "text", "text": json.dumps(device_info, ensure_ascii=False)},
                            {"type": "text", "text": prompt},
                        ],
                    },
                ],
            )
        )

        content = resp.choices[0].message.content or ""
        raw = XinDianTuReadImageKit._extract_json(content)

        result = {}
        try:
            data = json.loads(raw)
            if isinstance(data, dict):
                result = data
        except Exception as e:
            logger.error(f"Error parsing address JSON: {e}")

        # 等待并合并按钮坐标结果
        try:
            button_result = await button_task
            if button_result:
                result.update(button_result)
        except Exception as e:
            logger.error(f"Error in button coordinate task: {e}")

        return result

    @staticmethod
    async def parse_price_schedule_multi(station_name: str, image_urls: list, device_info: dict = None) -> list:
        """
        多图解析（按图片内容 MD5 去重）并合并时段结果

        参数：
            image_urls: 多张价格表图片的 URL 列表
            device_info: 设备信息（可选）

        逻辑：
            1. 逐张下载并计算 MD5，去重得到唯一图片集合
            2. 对每张唯一图片解析价格时段
            3. 将所有图片的时段行合并为一个列表返回

        返回：
            合并后的时段列表（未按小时拆分）
        """
        if not image_urls:
            return []
        # MD5 -> URL 的映射，用于去重
        md5_to_url = {}
        for u in image_urls:
            m = await XinDianTuReadImageKit._fetch_md5(u)
            if m and m not in md5_to_url:
                md5_to_url[m] = u
        unique_urls = list(md5_to_url.values())
        # 合并时段结果
        combined = []
        for u in unique_urls:
            rows = await XinDianTuReadImageKit.parse_price_schedule(station_name, u, device_info=device_info)
            if rows:
                combined.extend(rows)
        return combined

    @staticmethod
    def hourly_full_day(rows: list) -> list:
        """
        将时段列表规整为全天24个整点小时段

        参数：
            rows: 原始时段列表（可来自多图合并）

        返回：
            固定24条记录（00:00-01:00 到 23:00-24:00），
            若某小时未被任何时段覆盖，则费用为 None
        说明：
            - 选择覆盖该小时段的时段（若多个，则选择重叠时间最长的一个）
            - 保证返回结构完整，便于后续消费端显示或补全
        """
        # 预处理：转换为分钟区间
        intervals = []
        for r in rows:
            s = XinDianTuReadImageKit._to_minutes(r.get("start"))
            e = XinDianTuReadImageKit._to_minutes(r.get("end"))
            if e <= s:
                continue
            s = max(0, s)
            e = min(1440, e)
            intervals.append({
                "s": s, "e": e,
                "price_kwh": r.get("price_kwh"),
                "electric_fee_kwh": r.get("electric_fee_kwh"),
                "service_fee_kwh": r.get("service_fee_kwh"),
            })
        intervals.sort(key=lambda x: (x["s"], x["e"]))

        result = []
        for h in range(24):
            hs = h * 60
            he = (h + 1) * 60
            best = None
            best_overlap = 0
            for it in intervals:
                overlap = max(0, min(he, it["e"]) - max(hs, it["s"]))
                if overlap > best_overlap:
                    best_overlap = overlap
                    best = it
            result.append({
                "start": XinDianTuReadImageKit._fmt(hs),
                "end": XinDianTuReadImageKit._fmt(he),
                "price_kwh": best["price_kwh"] if best else None,
                "electric_fee_kwh": best["electric_fee_kwh"] if best else None,
                "service_fee_kwh": best["service_fee_kwh"] if best else None,
            })
        return result


async def test1():
    # 测试新电途第一层结构的读取
    url = "https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Temp/8fd79c68-fec6-4ca7-8d8e-fbff3c6862c8.jpg"
    print(f"Testing First Level with URL: {url}")
    result = await XinDianTuReadImageKit.parse_first_level_image_url(url)
    print("First Level Result:")
    print(json.dumps(result, ensure_ascii=False, indent=2))


async def test2(station_name: str):
    # 测试新电途第二层结构的读取
    url = "https://dsideal.obs.myhuaweicloud.com/HuangHai/Temp/SecondPage.jpg"
    print(f"Testing address extraction from: {url}")
    result = await XinDianTuReadImageKit.parse_address(station_name, url)
    result["station_name"] = station_name
    print("Address result:")
    print(json.dumps(result, ensure_ascii=False, indent=2))


async def test3(station_name: str):
    # 测试新电途价格表图片的解析
    samples = [
        "https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Temp/XinDianTu/1.jpg",
        "https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Temp/XinDianTu/2.jpg",
    ]
    rows = await XinDianTuReadImageKit.parse_price_schedule_multi(station_name, samples)
    hourly = XinDianTuReadImageKit.hourly_full_day(rows)
    print(json.dumps(hourly, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    #asyncio.run(test1())
    station_name = '吉林省看守所充电站'
    #asyncio.run(test2(station_name))
    asyncio.run(test3(station_name))