aiData/Apps/YiLaiTe/ReadImageKit.py

# coding=utf-8
import logging
import os
import sys

# Ensure sys path includes root for imports if not already
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
if project_root not in sys.path:
    sys.path.append(project_root)

from Util.VLMKit import VLMKit

import json
import re
from Apps.YiLaiTe.Kit import draw_rectangles
from Apps.YiLaiTe.Config.Setting import DRAW_DEBUG_BOXES

logger = logging.getLogger(__name__)

class ReadImageKit:
    def __init__(self):
        self.vlm = VLMKit()

    @classmethod
    async def detect_ad_popup(cls, image_path, device_info=None):
        """
        检测图片中是否存在广告弹窗，并返回关闭按钮坐标
        """
        vlm = VLMKit()
        prompt = """
        请仔细检查这张图片中是否存在**弹窗广告**或**悬浮广告**。
        广告可能有以下几种形式：
        1. 屏幕中央的大型弹窗广告：通常遮挡了页面内容，内容多为优惠券、活动推广等。
        2. 悬浮广告：通常在侧边或角落。
        3. 底部横幅广告。

        请返回关闭按钮的中心坐标。
        请以纯 JSON 格式输出：
        {
          "has_ad": true/false,
          "ad_type": "center" | "bottom" | "side" | "other",
          "close_point": [x, y]  // 绝对像素坐标
        }
        如果没有广告，请返回 {"has_ad": false}。
        """
        try:
            res_text = await vlm.analyze_image(image_path, prompt)
            json_str = vlm.extract_json(res_text)
            res = json.loads(json_str)
            if res.get("has_ad") and res.get("close_point"):
                p = res["close_point"]
                return {"x": p[0], "y": p[1], "ad_type": res.get("ad_type")}
            return None
        except Exception as e:
            logger.error(f"广告检测失败: {e}")
            return None

    async def analyze_station_list(self, image_path):
        """
        分析场站列表页图片，提取场站位置和基本信息
        """
        prompt = """
        分析这张充电站列表截图，提取所有充电站卡片信息。
        输出格式为 JSON 数组，每个对象包含：
        - "name": 场站名称
        - "point": 场站卡片的中心点击坐标 [x, y]
        - "bbox": 场站卡片的边界框 [x1, y1, x2, y2]

        注意：
        1. 仅提取明显的场站列表卡片。
        2. 坐标请以像素为单位。
        """

        try:
            res_text = await self.vlm.analyze_image(image_path, prompt)
            # 使用 VLMKit 的提取方法
            json_str = self.vlm.extract_json(res_text)
            stations = json.loads(json_str)

            if isinstance(stations, list):
                # 调试绘图
                if DRAW_DEBUG_BOXES:
                    bboxes = [s['bbox'] for s in stations if 'bbox' in s]
                    points = [s['point'] for s in stations if 'point' in s]
                    draw_rectangles(image_path, bboxes + points)

                return stations
            return []
        except Exception as e:
            logger.error(f"VLM 分析列表页失败: {e}")
            return []

    async def analyze_detail_price(self, image_path):
        """
        分析详情页或三级价格页图片，提取分时电价
        """
        prompt = """
        分析这张充电站价格详情截图，提取完整的分时价格信息。
        输出格式为 JSON 数组，每个对象包含：
        - "time_range": 时间段 (例如 "00:00-08:00")
        - "total_price": 总价 (电费+服务费)
        - "elec_price": 电费 (如果能看到)
        - "service_price": 服务费 (如果能看到)

        如果没有看到分时段价格，请尝试寻找“价格详情”或“分时电价”按钮的坐标。
        """

        try:
            res_text = await self.vlm.analyze_image(image_path, prompt)
            json_str = self.vlm.extract_json(res_text)
            return json.loads(json_str)
        except Exception as e:
            logger.error(f"VLM 分析价格页失败: {e}")
            return []