67 lines
2.3 KiB
Python
67 lines
2.3 KiB
Python
# coding=utf-8
|
|
import os
|
|
import base64
|
|
import json
|
|
import logging
|
|
from openai import AsyncOpenAI
|
|
from Config.Config import ALY_LLM_API_KEY, VL_MODEL_NAME
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class VLMKit:
|
|
def __init__(self, api_key=None, base_url=None, model=None):
|
|
self.api_key = api_key or ALY_LLM_API_KEY
|
|
self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
|
self.model = model or VL_MODEL_NAME
|
|
self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
|
|
|
|
async def analyze_image(self, image_path, prompt, max_tokens=2000, temperature=0.01):
|
|
"""
|
|
分析单张本地图片并返回模型响应文本
|
|
"""
|
|
if not os.path.exists(image_path):
|
|
raise FileNotFoundError(f"Image not found: {image_path}")
|
|
|
|
with open(image_path, "rb") as image_file:
|
|
encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
|
|
|
|
try:
|
|
response = await self.client.chat.completions.create(
|
|
model=self.model,
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": prompt},
|
|
{
|
|
"type": "image_url",
|
|
"image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"},
|
|
},
|
|
],
|
|
}
|
|
],
|
|
max_tokens=max_tokens,
|
|
temperature=temperature
|
|
)
|
|
return response.choices[0].message.content
|
|
except Exception as e:
|
|
logger.error(f"VLMKit analyze_image error: {e}")
|
|
raise e
|
|
|
|
def extract_json(self, text):
|
|
"""
|
|
从文本中提取 JSON 部分
|
|
"""
|
|
import re
|
|
# 优先匹配 markdown 格式的 json 块
|
|
json_match = re.search(r'```json\s*(.*?)\s*```', text, re.DOTALL)
|
|
if json_match:
|
|
return json_match.group(1)
|
|
|
|
# 其次匹配数组或对象
|
|
json_match = re.search(r'(\[.*\]|\{.*\})', text, re.DOTALL)
|
|
if json_match:
|
|
return json_match.group(1)
|
|
|
|
return text
|