aiData/Util/VLMKit.py

# coding=utf-8
import os
import base64
import json
import logging
from openai import AsyncOpenAI
from Config.Config import ALY_LLM_API_KEY, VL_MODEL_NAME

logger = logging.getLogger(__name__)

class VLMKit:
    def __init__(self, api_key=None, base_url=None, model=None):
        self.api_key = api_key or ALY_LLM_API_KEY
        self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
        self.model = model or VL_MODEL_NAME
        self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)

    async def analyze_image(self, image_path, prompt, max_tokens=2000, temperature=0.01):
        """
        分析单张本地图片并返回模型响应文本
        """
        if not os.path.exists(image_path):
            raise FileNotFoundError(f"Image not found: {image_path}")

        with open(image_path, "rb") as image_file:
            encoded_image = base64.b64encode(image_file.read()).decode("utf-8")

        try:
            response = await self.client.chat.completions.create(
                model=self.model,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                            {
                                "type": "image_url",
                                "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
                            },
                        ],
                    }
                ],
                max_tokens=max_tokens,
                temperature=temperature
            )
            return response.choices[0].message.content
        except Exception as e:
            logger.error(f"VLMKit analyze_image error: {e}")
            raise e

    def extract_json(self, text):
        """
        从文本中提取 JSON 部分
        """
        import re
        # 优先匹配 markdown 格式的 json 块
        json_match = re.search(r'```json\s*(.*?)\s*```', text, re.DOTALL)
        if json_match:
            return json_match.group(1)

        # 其次匹配数组或对象
        json_match = re.search(r'(\[.*\]|\{.*\})', text, re.DOTALL)
        if json_match:
            return json_match.group(1)

        return text