aiData/Apps/YeLiTe/ReadImageKit.py

# coding=utf-8
import logging
import os
import sys

# Ensure sys path includes root for imports if not already
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
if project_root not in sys.path:
    sys.path.append(project_root)

from Util.VLMKit import VLMKit

import json
import re
from Apps.YeLiTe.Kit import draw_rectangles, detect_cards_cv
from Apps.YeLiTe.Config.Setting import DRAW_DEBUG_BOXES, SAFE_EXCLUDE_RATIO, BOTTOM_SAFE_EXCLUDE_RATIO

logger = logging.getLogger(__name__)

class ReadImageKit:
    def __init__(self):
        self.vlm = VLMKit()

    async def analyze_detail_price(self, image_path):
        """
        分析详情页截图，提取电价信息
        """
        prompt = """
        分析这张充电站详情页截图，提取**电价时段表**。
        请仔细寻找包含“时段”、“电价”、“服务费”或“总价”的表格或列表。

        请提取每个时段的：
        1. 开始时间 (HH:MM)
        2. 结束时间 (HH:MM)
        3. 总电价 (元/度，包含电费和服务费)

        如果图片中只显示了当前时段的价格，请尽可能提取当前时段的信息。
        如果包含多个时段，请全部提取。

        输出格式为 JSON 数组：
        [
            {
                "start": "00:00",
                "end": "08:00",
                "price": 1.23
            },
            ...
        ]
        如果无法识别任何价格信息，请返回空数组 []。
        """
        try:
            res_text = await self.vlm.analyze_image(image_path, prompt)

            # 强制写入文件调试
            # debug_file = r"d:\dsWork\aiData\vlm_response_debug.txt"
            # with open(debug_file, "a", encoding="utf-8") as f:
            #     f.write(f"\n\n=== {os.path.basename(image_path)} ===\n")
            #     f.write(str(res_text))
            #     f.write("\n====================================\n")

            # 增加调试日志，查看 VLM 原始返回
            logger.info(f"VLM Price Analysis Result for {os.path.basename(image_path)}: {res_text[:200]}...")

            json_str = self.vlm.extract_json(res_text)
            prices = json.loads(json_str)

            # 兼容性处理：如果 VLM 返回了 time_range 字段，进行转换
            normalized_prices = []
            if isinstance(prices, list):
                for p in prices:
                    new_p = p.copy()

                    # 处理时间段
                    if 'time_range' in p and ('start' not in p or 'end' not in p):
                        tr = p['time_range'].replace('~', '-').replace(' ', '')
                        parts = tr.split('-')
                        if len(parts) >= 2:
                            new_p['start'] = parts[0]
                            new_p['end'] = parts[1]

                    # 处理价格字段
                    if 'price' not in p:
                        if 'total_price' in p:
                            new_p['price'] = p['total_price']
                        elif 'elec_price' in p and 'service_price' in p:
                            try:
                                new_p['price'] = float(p['elec_price']) + float(p['service_price'])
                            except:
                                pass

                    normalized_prices.append(new_p)
                return normalized_prices

            return []
        except Exception as e:
            logger.error(f"分析电价详情失败: {e}")
            if 'res_text' in locals():
                logger.error(f"Failed VLM Response: {res_text}")
            return []

    async def analyze_detail_basic_info(self, image_path):
        prompt = """
        分析这张充电站详情页首屏截图，提取以下信息并返回 JSON：
        {
            "name": "场站名称",
            "address": "完整地址",
            "parking_info": "停车收费信息"
        }
        name 为页面标题中的场站名称，address 为定位图标附近的完整地址，parking_info 为页面中与停车收费相关的文字。如果某项无法识别，请将该字段设为 null。
        只返回纯 JSON 对象，不要包含额外说明文字。
        """
        try:
            res_text = await self.vlm.analyze_image(image_path, prompt)
            json_str = self.vlm.extract_json(res_text)
            data = json.loads(json_str)
            if isinstance(data, dict):
                return data
            return {}
        except Exception as e:
            logger.error(f"分析详情页基础信息失败: {e}")
            return {}

    @classmethod
    async def detect_ad_popup(cls, image_path, device_info=None):
        """
        检测图片中是否存在广告弹窗，并返回关闭按钮坐标
        """
        vlm = VLMKit()
        prompt = """
        请仔细检查这张图片中是否存在**弹窗广告**或**悬浮广告**。
        广告可能有以下几种形式：
        1. 屏幕中央的大型弹窗广告：通常遮挡了页面内容，内容多为优惠券、活动推广等。
        2. 悬浮广告：通常在侧边或角落。
        3. 底部横幅广告。

        请返回关闭按钮的中心坐标。
        请以纯 JSON 格式输出：
        {
          "has_ad": true/false,
          "ad_type": "center" | "bottom" | "side" | "other",
          "close_point": [x, y]  // 绝对像素坐标
        }
        如果没有广告，请返回 {"has_ad": false}。
        """
        try:
            res_text = await vlm.analyze_image(image_path, prompt)
            json_str = vlm.extract_json(res_text)
            res = json.loads(json_str)
            if res.get("has_ad") and res.get("close_point"):
                p = res["close_point"]
                return {"x": p[0], "y": p[1], "ad_type": res.get("ad_type")}
            return None
        except Exception as e:
            logger.error(f"广告检测失败: {e}")
            return None

    async def analyze_station_list(self, image_path):
        """
        分析场站列表页图片，提取场站位置和基本信息 (Hybrid 模式: CV 检测边界 + VLM 识别文字)
        """
        # 1. 使用图形学检测卡片边界
        cv_bboxes = detect_cards_cv(image_path, top_ratio=SAFE_EXCLUDE_RATIO, bottom_ratio=BOTTOM_SAFE_EXCLUDE_RATIO)

        if cv_bboxes:
            # 在图片上绘制绿色框，方便 VLM 对应
            draw_rectangles(image_path, cv_bboxes)

            prompt = f"""
            图片中已经用绿色矩形框标记了 {len(cv_bboxes)} 个可能的充电站卡片。
            请按从上到下的顺序，识别每个绿色框内的场站信息。

            输出格式为 JSON 数组，长度必须为 {len(cv_bboxes)}。
            如果某个框内不是有效的场站卡片，请在对应的数组位置返回 null。

            每个对象包含：
            - "name": 场站名称 (仅提取名称文字)
            - "address": 场站地址 (通常在名称下方，请完整提取地址文字)
            - "distance": 距离 (如 "1.2km", "800m" 等，如果存在请提取)
            - "total_piles": 总桩数 (如果存在请提取)
            - "free_piles": 空闲桩数 (如果存在请提取)
            - "is_valid": true/false (是否为真实的场站卡片)

            注意：仅提取名称、地址和距离，不要包含价格。
            """
        else:
            # 如果 CV 没检测到，退回到纯 VLM 模式
            prompt = """
            分析这张充电站列表截图，提取所有充电站卡片信息。
            忽略顶部的筛选栏，仅提取下方重复出现的场站卡片。
            输出格式为 JSON 数组，每个对象包含：
            - "name": 场站名称
            - "address": 场站地址 (请仔细识别并提取场站的详细地址信息)
            - "distance": 距离 (如 "1.2km", "800m" 等，如果存在请提取)
            - "point": 场站卡片的中心点击坐标 [x, y]
            - "bbox": 场站卡片的边界框 [x1, y1, x2, y2]
            """

        try:
            res_text = await self.vlm.analyze_image(image_path, prompt)
            json_str = self.vlm.extract_json(res_text)
            vlm_results = json.loads(json_str)

            final_stations = []
            if cv_bboxes and isinstance(vlm_results, list):
                # 将 VLM 识别到的名称与 CV 的坐标匹配
                # 注意：VLM 返回的数组可能包含 null (对应无效的框)，我们需要跳过
                for i, res in enumerate(vlm_results):
                    if i < len(cv_bboxes):
                        bbox = cv_bboxes[i]
                        # 只要 VLM 认为有效，或者 VLM 没明确说是无效的且有名字，就认为是有效场站
                        # 有时候 VLM 会漏掉 is_valid 字段
                        if res and (res.get("is_valid") is True or (res.get("name") and res.get("is_valid") is not False)):
                            final_stations.append({
                                "name": res.get("name"),
                                "address": res.get("address"),
                                "distance": res.get("distance"),
                                "total_piles": res.get("total_piles"),
                                "free_piles": res.get("free_piles"),
                                "point": [(bbox[0] + bbox[2]) // 2, (bbox[1] + bbox[3]) // 2],
                                "bbox": bbox
                            })
                        else:
                            logger.info(f"VLM 判定第 {i+1} 个框无效或未识别到名称: {res}")
            elif not cv_bboxes:
                final_stations = vlm_results if isinstance(vlm_results, list) else []

            return final_stations
        except Exception as e:
            logger.error(f"Hybrid 分析列表页失败: {e}")
            return []