'commit'
This commit is contained in:
14
.idea/Crawler.iml
generated
14
.idea/Crawler.iml
generated
@@ -1,14 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<module type="PYTHON_MODULE" version="4">
|
|
||||||
<component name="NewModuleRootManager">
|
|
||||||
<content url="file://$MODULE_DIR$">
|
|
||||||
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
|
||||||
</content>
|
|
||||||
<orderEntry type="jdk" jdkName="D:\anaconda3\envs\py310" jdkType="Python SDK" />
|
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
|
||||||
</component>
|
|
||||||
<component name="PyDocumentationSettings">
|
|
||||||
<option name="format" value="PLAIN" />
|
|
||||||
<option name="myDocStringFormat" value="Plain" />
|
|
||||||
</component>
|
|
||||||
</module>
|
|
||||||
2
.idea/modules.xml
generated
2
.idea/modules.xml
generated
@@ -2,7 +2,7 @@
|
|||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="ProjectModuleManager">
|
<component name="ProjectModuleManager">
|
||||||
<modules>
|
<modules>
|
||||||
<module fileurl="file://$PROJECT_DIR$/.idea/Crawler.iml" filepath="$PROJECT_DIR$/.idea/Crawler.iml" />
|
<module fileurl="file://$PROJECT_DIR$/.idea/aIData.iml" filepath="$PROJECT_DIR$/.idea/aIData.iml" />
|
||||||
</modules>
|
</modules>
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
||||||
@@ -74,11 +74,11 @@ async def get_station_list(d, service, max_scrolls=MAX_SCROLLS):
|
|||||||
logger.info("正在执行图形学切片分析...")
|
logger.info("正在执行图形学切片分析...")
|
||||||
json_data = Kit.crop_cards_from_image(screenshot_path)
|
json_data = Kit.crop_cards_from_image(screenshot_path)
|
||||||
|
|
||||||
# 3. 调用 VL 模型识别并保存数据
|
# 3. 调用混合模式识别 (图形学切片 + 本地 OCR)
|
||||||
stations = await service.process_station_list_vl(screenshot_path, device_info=device_info)
|
stations = await service.process_station_list_hybrid(screenshot_path, device_info=device_info)
|
||||||
logger.info(f"本页识别到 {len(stations)} 个场站")
|
logger.info(f"本页识别到 {len(stations)} 个场站")
|
||||||
|
|
||||||
# 4. 匹配几何卡片与 VL 识别结果
|
# 4. 匹配几何卡片与识别结果 (混合模式下已经包含在 stations 中,但为了兼容旧逻辑进行填充)
|
||||||
if json_data and json_data.get("cards") and stations:
|
if json_data and json_data.get("cards") and stations:
|
||||||
for card in json_data["cards"]:
|
for card in json_data["cards"]:
|
||||||
card_rect = card["rect"] # [x1, y1, x2, y2]
|
card_rect = card["rect"] # [x1, y1, x2, y2]
|
||||||
|
|||||||
@@ -27,18 +27,9 @@ async def open_mini_program():
|
|||||||
await asyncio.sleep(5)
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
# 2. 确保在消息列表页并点击搜索
|
# 2. 确保在消息列表页并点击搜索
|
||||||
logger.info("尝试查找并点击 '搜索按钮'...")
|
logger.info("直接使用坐标点击 '搜索按钮' (84%, 8%)...")
|
||||||
# 优先尝试从 XinDianTu 的模板中复用 SearchButton.jpg (如果存在)
|
w, h = d.window_size()
|
||||||
search_template = os.path.join(os.path.dirname(BASE_DIR), "XinDianTu", "Templates", "SearchButton.jpg")
|
d.click(int(w * 0.84), int(h * 0.08))
|
||||||
if not os.path.exists(search_template):
|
|
||||||
search_template = os.path.join(BASE_DIR, "Templates", "SearchButton.jpg")
|
|
||||||
|
|
||||||
if click_image_template(d, search_template):
|
|
||||||
logger.info("点击了搜索按钮")
|
|
||||||
else:
|
|
||||||
logger.warning("未找到搜索按钮,使用坐标点击 (84%, 8%)")
|
|
||||||
w, h = d.window_size()
|
|
||||||
d.click(int(w * 0.84), int(h * 0.08))
|
|
||||||
|
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import json
|
|||||||
import aiohttp
|
import aiohttp
|
||||||
import logging
|
import logging
|
||||||
import base64
|
import base64
|
||||||
|
import cv2
|
||||||
from openai import AsyncOpenAI, BadRequestError
|
from openai import AsyncOpenAI, BadRequestError
|
||||||
from Config.Config import (
|
from Config.Config import (
|
||||||
ALY_LLM_API_KEY, VL_MODEL_NAME, VL_MODEL_NAME_AD
|
ALY_LLM_API_KEY, VL_MODEL_NAME, VL_MODEL_NAME_AD
|
||||||
@@ -15,6 +16,8 @@ from Apps.AiTeJiYiChong.Config.Setting import (
|
|||||||
SAFE_EXCLUDE_RATIO, FALLBACK_WIDTH, FALLBACK_HEIGHT,
|
SAFE_EXCLUDE_RATIO, FALLBACK_WIDTH, FALLBACK_HEIGHT,
|
||||||
BOTTOM_SAFE_EXCLUDE_RATIO
|
BOTTOM_SAFE_EXCLUDE_RATIO
|
||||||
)
|
)
|
||||||
|
from Util.PaddleOCRKit import get_ocr_kit
|
||||||
|
from Apps.AiTeJiYiChong import Kit
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -287,6 +290,65 @@ class ReadImageKit:
|
|||||||
logger.error(f"Error calling VL model for detail: {e}")
|
logger.error(f"Error calling VL model for detail: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
async def get_stations_hybrid(cls, image_path: str, device_info=None):
|
||||||
|
"""
|
||||||
|
混合识别模式:图形学切片 + 本地 PaddleOCR 识别
|
||||||
|
"""
|
||||||
|
if not os.path.exists(image_path):
|
||||||
|
logger.error(f"Image not found: {image_path}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 1. 使用 Kit 中的图形学算法识别卡片区域
|
||||||
|
# Kit.crop_cards_from_image 会生成 .json, _flag.jpg, _vl.jpg
|
||||||
|
# 我们主要需要它返回的 json_data
|
||||||
|
json_data = Kit.crop_cards_from_image(image_path, save_debug=True)
|
||||||
|
if not json_data or not json_data.get("cards"):
|
||||||
|
logger.warning("No cards detected by graphical slicing.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
img = Image.open(image_path).convert("RGB")
|
||||||
|
ocr_kit = get_ocr_kit()
|
||||||
|
|
||||||
|
final_stations = []
|
||||||
|
|
||||||
|
# 2. 对每个卡片区域进行 OCR 识别
|
||||||
|
# 注意:PaddleOCR 识别过程较快,且通常不涉及网络请求,可以根据需要选择并行或串行
|
||||||
|
# 这里使用串行以保证日志输出整齐,如果追求极致性能可改用 asyncio.to_thread 并行
|
||||||
|
for card in json_data["cards"]:
|
||||||
|
rect = card["rect"] # [x1, y1, x2, y2]
|
||||||
|
# 裁剪卡片
|
||||||
|
patch = img.crop((rect[0], rect[1], rect[2], rect[3]))
|
||||||
|
|
||||||
|
# 转换为 ndarray 供 PaddleOCR 使用
|
||||||
|
patch_cv = cv2.cvtColor(np.array(patch), cv2.COLOR_RGB2BGR)
|
||||||
|
|
||||||
|
# OCR 识别
|
||||||
|
logger.info(f"正在识别卡片 {card['id']}: {rect}")
|
||||||
|
res = ocr_kit.recognize(patch_cv)
|
||||||
|
|
||||||
|
if res and res.get("station_name"):
|
||||||
|
# 注入点击坐标和原始区域信息
|
||||||
|
res["uia_center_x"] = card["click_point"][0]
|
||||||
|
res["uia_center_y"] = card["click_point"][1]
|
||||||
|
res["rect"] = rect
|
||||||
|
|
||||||
|
# 转换 bounds 到 0-1000 空间(保持与 VL 模式兼容)
|
||||||
|
w, h = img.size
|
||||||
|
res["bounds"] = [
|
||||||
|
int(rect[0] * 1000 / w),
|
||||||
|
int(rect[1] * 1000 / h),
|
||||||
|
int(rect[2] * 1000 / w),
|
||||||
|
int(rect[3] * 1000 / h)
|
||||||
|
]
|
||||||
|
|
||||||
|
final_stations.append(res)
|
||||||
|
logger.info(f"卡片 {card['id']} 识别成功: {res['station_name']}")
|
||||||
|
else:
|
||||||
|
logger.warning(f"卡片 {card['id']} 识别失败或无名称")
|
||||||
|
|
||||||
|
return final_stations
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
async def get_stations_from_image(cls, image_path: str, device_info=None):
|
async def get_stations_from_image(cls, image_path: str, device_info=None):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -125,6 +125,69 @@ class AiTeJiYiChongService:
|
|||||||
logger.info(f"场站详情处理完成: {name}")
|
logger.info(f"场站详情处理完成: {name}")
|
||||||
return detail
|
return detail
|
||||||
|
|
||||||
|
async def process_station_list_hybrid(self, image_path, device_info=None) -> list:
|
||||||
|
"""
|
||||||
|
基于混合模式处理场站列表 (图形学切片 + 本地 OCR)
|
||||||
|
"""
|
||||||
|
station_list = await ReadImageKit.get_stations_hybrid(image_path, device_info=device_info)
|
||||||
|
if not station_list:
|
||||||
|
return []
|
||||||
|
|
||||||
|
processed_stations = []
|
||||||
|
async with await self.db.get_session() as session:
|
||||||
|
for station in station_list:
|
||||||
|
name = station.get("station_name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
|
||||||
|
station_hash = self.get_hash(name)
|
||||||
|
now = datetime.now()
|
||||||
|
station["station_hash"] = station_hash
|
||||||
|
|
||||||
|
# 1. 保存 Profile
|
||||||
|
profile_id = self.generate_id()
|
||||||
|
await self.station_profile_model.save(
|
||||||
|
session=session,
|
||||||
|
id=profile_id,
|
||||||
|
station_hash=station_hash,
|
||||||
|
operator=self.operator,
|
||||||
|
station_name=name,
|
||||||
|
valid_start_time=now
|
||||||
|
)
|
||||||
|
station["profile_id"] = profile_id
|
||||||
|
station["valid_start_time"] = now.isoformat()
|
||||||
|
|
||||||
|
# 2. 保存 Status (解析价格和电桩)
|
||||||
|
status_id = self.generate_id()
|
||||||
|
|
||||||
|
# 处理 piles 字段
|
||||||
|
piles_data = station.get("piles")
|
||||||
|
total, free = 0, 0
|
||||||
|
if isinstance(piles_data, list):
|
||||||
|
for p in piles_data:
|
||||||
|
total += int(p.get("total", 0))
|
||||||
|
free += int(p.get("free", 0))
|
||||||
|
|
||||||
|
await self.station_status_model.save(
|
||||||
|
session=session,
|
||||||
|
id=status_id,
|
||||||
|
station_hash=station_hash,
|
||||||
|
total_piles=total,
|
||||||
|
free_piles=free,
|
||||||
|
piles_detail_json=piles_data,
|
||||||
|
current_price=float(station.get("price", 0)) if station.get("price") else 0.0,
|
||||||
|
parking_info=station.get("parking", ""),
|
||||||
|
distance=station.get("distance", ""),
|
||||||
|
valid_start_time=now
|
||||||
|
)
|
||||||
|
station["status_id"] = status_id
|
||||||
|
|
||||||
|
processed_stations.append(station)
|
||||||
|
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
return processed_stations
|
||||||
|
|
||||||
async def process_station_list_vl(self, image_path, device_info=None) -> list:
|
async def process_station_list_vl(self, image_path, device_info=None) -> list:
|
||||||
"""
|
"""
|
||||||
基于 VL 模式处理场站列表
|
基于 VL 模式处理场站列表
|
||||||
|
|||||||
@@ -1,41 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
import asyncio
|
|
||||||
import uiautomator2 as u2
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import uuid
|
|
||||||
|
|
||||||
project_root = r"D:\dsWork\aiData"
|
|
||||||
if project_root not in sys.path:
|
|
||||||
sys.path.append(project_root)
|
|
||||||
|
|
||||||
from Apps.AiTeJiYiChong import Kit
|
|
||||||
from Apps.AiTeJiYiChong.Kit import take_screenshot, read_image
|
|
||||||
from Config.Config import TEMP_IMAGE_DIR
|
|
||||||
|
|
||||||
async def test_click_and_detail():
|
|
||||||
d = u2.connect()
|
|
||||||
image_uuid = str(uuid.uuid4())
|
|
||||||
print(f"Taking initial screenshot...")
|
|
||||||
screenshot_path = take_screenshot(d, image_uuid, save_dir=TEMP_IMAGE_DIR)
|
|
||||||
|
|
||||||
print(f"Analyzing cards in {screenshot_path}...")
|
|
||||||
json_data = Kit.crop_cards_from_image(screenshot_path)
|
|
||||||
|
|
||||||
if json_data and json_data.get("cards"):
|
|
||||||
first_card = json_data["cards"][0]
|
|
||||||
click_x, click_y = first_card["click_point"]
|
|
||||||
print(f"Clicking card at ({click_x}, {click_y})...")
|
|
||||||
d.click(click_x, click_y)
|
|
||||||
|
|
||||||
print("Waiting for detail page...")
|
|
||||||
await asyncio.sleep(5)
|
|
||||||
|
|
||||||
detail_uuid = f"detail_{image_uuid}"
|
|
||||||
detail_path = take_screenshot(d, detail_uuid, save_dir=TEMP_IMAGE_DIR)
|
|
||||||
print(f"Detail page screenshot: {detail_path}")
|
|
||||||
else:
|
|
||||||
print("No cards found on current screen.")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(test_click_and_detail())
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1 +0,0 @@
|
|||||||
微信小程序名称: 艾特吉易充
|
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 1.3 KiB |
@@ -1,16 +1,16 @@
|
|||||||
# 黄海在公司内网开发时的配置信息
|
# 黄海在公司内网开发时的配置信息
|
||||||
DORIS_HOST = "10.10.14.204"
|
# DORIS_HOST = "10.10.14.204"
|
||||||
DORIS_PORT = 9030
|
# DORIS_PORT = 9030
|
||||||
DORIS_FENODES = "10.10.14.204:8030"
|
# DORIS_FENODES = "10.10.14.204:8030"
|
||||||
REDIS_HOST = '10.10.14.14'
|
# REDIS_HOST = '10.10.14.14'
|
||||||
REDIS_PASSWORD = None # 如果没有密码则设为 None
|
# REDIS_PASSWORD = None # 如果没有密码则设为 None
|
||||||
|
|
||||||
# 黄海在家开发时的配置信息
|
# 黄海在家开发时的配置信息
|
||||||
#DORIS_HOST = "www.hzkjai.com"
|
DORIS_HOST = "www.hzkjai.com"
|
||||||
#DORIS_PORT = 27025
|
DORIS_PORT = 27025
|
||||||
#DORIS_FENODES = "www.hzkjai.com:27024"
|
DORIS_FENODES = "www.hzkjai.com:27024"
|
||||||
#REDIS_HOST = '127.0.0.1'
|
REDIS_HOST = '127.0.0.1'
|
||||||
#REDIS_PASSWORD = "DsideaL147258369"
|
REDIS_PASSWORD = "DsideaL147258369"
|
||||||
|
|
||||||
# 视觉模型配置
|
# 视觉模型配置
|
||||||
VL_MODEL_NAME = "qwen3-vl-flash"
|
VL_MODEL_NAME = "qwen3-vl-flash"
|
||||||
|
|||||||
Binary file not shown.
@@ -50,8 +50,8 @@ class OcrParser:
|
|||||||
|
|
||||||
# 2. 距离 (Distance)
|
# 2. 距离 (Distance)
|
||||||
for line in clean_lines:
|
for line in clean_lines:
|
||||||
# 匹配 "7.4km", "17.4km"
|
# 匹配 "7.4km", "17.4km", "90m"
|
||||||
m = re.search(r"(\d+(\.\d+)?)\s*km", line, re.IGNORECASE)
|
m = re.search(r"(\d+(\.\d+)?)\s*(km|m)", line, re.IGNORECASE)
|
||||||
if m:
|
if m:
|
||||||
result["distance"] = m.group(0)
|
result["distance"] = m.group(0)
|
||||||
break
|
break
|
||||||
|
|||||||
@@ -26,13 +26,23 @@ class PaddleOCRKit:
|
|||||||
Using PP-OCRv4 mobile model for speed.
|
Using PP-OCRv4 mobile model for speed.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
# 1. 抑制 PaddlePaddle 的日志输出
|
||||||
|
os.environ['GLOG_minloglevel'] = '3' # 抑制 GLOG (C++) 日志
|
||||||
|
os.environ['FLAGS_allocator_strategy'] = 'auto_growth' # 减少显存占用警告
|
||||||
|
|
||||||
|
# 2. 抑制 Python 层的 paddle 日志
|
||||||
|
import logging as py_logging
|
||||||
|
py_logging.getLogger('paddle').setLevel(py_logging.ERROR)
|
||||||
|
py_logging.getLogger('ppocr').setLevel(py_logging.ERROR)
|
||||||
|
|
||||||
logger.info("Initializing PaddleOCR (PP-OCRv4 Mobile)...")
|
logger.info("Initializing PaddleOCR (PP-OCRv4 Mobile)...")
|
||||||
# use_angle_cls is deprecated, using use_textline_orientation instead
|
# use_angle_cls is deprecated, using use_textline_orientation instead
|
||||||
# ocr_version='PP-OCRv4' defaults to mobile model
|
# ocr_version='PP-OCRv4' defaults to mobile model
|
||||||
self._ocr = PaddleOCR(
|
self._ocr = PaddleOCR(
|
||||||
use_textline_orientation=True,
|
use_textline_orientation=True,
|
||||||
lang="ch",
|
lang="ch",
|
||||||
ocr_version='PP-OCRv4'
|
ocr_version='PP-OCRv4',
|
||||||
|
show_log=False # 关键:关闭 PaddleOCR 内部日志打印
|
||||||
)
|
)
|
||||||
logger.info("PaddleOCR initialized successfully.")
|
logger.info("PaddleOCR initialized successfully.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user