# coding=utf-8 import os import base64 import json import logging from openai import AsyncOpenAI from Config.Config import ALY_LLM_API_KEY, VL_MODEL_NAME logger = logging.getLogger(__name__) class VLMKit: def __init__(self, api_key=None, base_url=None, model=None): self.api_key = api_key or ALY_LLM_API_KEY self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1" self.model = model or VL_MODEL_NAME self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url) async def analyze_image(self, image_path, prompt, max_tokens=2000, temperature=0.01): """ 分析单张本地图片并返回模型响应文本 """ if not os.path.exists(image_path): raise FileNotFoundError(f"Image not found: {image_path}") with open(image_path, "rb") as image_file: encoded_image = base64.b64encode(image_file.read()).decode("utf-8") try: response = await self.client.chat.completions.create( model=self.model, messages=[ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}, }, ], } ], max_tokens=max_tokens, temperature=temperature ) return response.choices[0].message.content except Exception as e: logger.error(f"VLMKit analyze_image error: {e}") raise e def extract_json(self, text): """ 从文本中提取 JSON 部分 """ import re # 优先匹配 markdown 格式的 json 块 json_match = re.search(r'```json\s*(.*?)\s*```', text, re.DOTALL) if json_match: return json_match.group(1) # 其次匹配数组或对象 json_match = re.search(r'(\[.*\]|\{.*\})', text, re.DOTALL) if json_match: return json_match.group(1) return text