This commit is contained in:
HuangHai
2026-01-12 08:09:32 +08:00
parent 22596afaa4
commit ca23ebf606
44 changed files with 60 additions and 877 deletions

1
.idea/vcs.xml generated
View File

@@ -3,5 +3,6 @@
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

View File

@@ -0,0 +1,31 @@
# 采集配置
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大,建议 0.3-0.5 以避免错过中间内容
SCROLL_DISTANCE_RATIO = 0.3
# 最大滑动/翻页次数,达到此次数后停止采集
MAX_SCROLLS = 100
# 默认抓取半径(公里),当检测到场站距离超过此值时停止采集
MAX_CRAWL_DISTANCE = 50
# 场站去重过期时间(秒),在此时间内重复出现的场站不会再次点击进入详情页
REDIS_STATION_EXPIRE = 120
# 数据库数据保留时长超过此时长的历史数据is_current=0将被删除
DATA_RETENTION_DAYS = 365
# 等待时间配置 (秒)
# 点击进入详情页后等待加载的时间
WAIT_DETAIL_PAGE_LOAD = 2.5
# 从详情页返回列表页后等待页面刷新的时间
WAIT_BACK_TO_LIST = 1.5
# 执行滑动操作后等待页面内容加载和稳定的时间
WAIT_AFTER_SCROLL = 3.0
# 坐标计算与安全防护
# 屏幕顶部安全排除比例 (0.0~1.0),此比例区域内不进行点击(避开状态栏、筛选栏等)
SAFE_EXCLUDE_RATIO = 0.20
# 屏幕底部安全排除比例 (0.0~1.0),此比例区域内不进行点击(避开底部导航栏、功能按钮等)
BOTTOM_SAFE_EXCLUDE_RATIO = 0.1
# 默认回退屏幕宽度,当无法自动获取设备信息时使用
FALLBACK_WIDTH = 1080
# 默认回退屏幕高度,当无法自动获取设备信息时使用
FALLBACK_HEIGHT = 2400

View File

@@ -27,7 +27,7 @@ from Config.Config import (
OBS_TMP_PREFIX, CDN_DOMAIN, SCROLL_DISTANCE_RATIO,
MAX_SCROLLS, REDIS_STATION_EXPIRE,
WAIT_DETAIL_PAGE_LOAD, WAIT_BACK_TO_LIST, WAIT_AFTER_SCROLL,
MAX_CRAWL_DISTANCE
MAX_CRAWL_DISTANCE, TEMP_IMAGE_DIR
)
# --- 用户配置区域 ---
@@ -128,7 +128,7 @@ async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
image_uuid = str(uuid.uuid4())
# 使用相对路径: 基于当前脚本目录下的 Images 文件夹
base_dir = os.path.dirname(os.path.abspath(__file__))
save_dir = os.path.join(base_dir, "./Images")
save_dir = TEMP_IMAGE_DIR
screenshot_path = take_screenshot(d, image_uuid, save_dir=save_dir)
logger.info(f"Step [1/6] 列表页截图已完成: {screenshot_path} (耗时: {time.time() - t_shot:.2f}s)")
@@ -383,7 +383,7 @@ async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
# 使用几何特征识别 "全部时段" 按钮
# 临时截图
temp_uuid = "temp_find_expand"
screenshot_path = take_screenshot(d, temp_uuid, save_dir="Temp")
screenshot_path = take_screenshot(d, temp_uuid, save_dir=TEMP_IMAGE_DIR)
# 尝试识别,将调试图片保存到 Images 目录
t_find = time.time()

View File

@@ -3,7 +3,7 @@ import os
import cv2
import numpy as np
import time
from Config.Config import BOTTOM_SAFE_EXCLUDE_RATIO
from Config.Config import BOTTOM_SAFE_EXCLUDE_RATIO, TEMP_IMAGE_DIR
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
@@ -29,7 +29,7 @@ def save_image(path, img):
return False
# 截图
def take_screenshot(d, image_uuid, save_dir="Screenshot"):
def take_screenshot(d, image_uuid, save_dir=TEMP_IMAGE_DIR):
path = f"{save_dir}/{image_uuid}.jpg"
os.makedirs(save_dir, exist_ok=True)
d.screenshot(path)
@@ -61,7 +61,7 @@ def click_image_template(d, template_path, timeout=5.0, threshold=0.8):
while time.time() - start_time < timeout:
# 临时截图
temp_uuid = "temp_click_check"
screenshot_path = take_screenshot(d, temp_uuid, save_dir="Temp")
screenshot_path = take_screenshot(d, temp_uuid, save_dir=TEMP_IMAGE_DIR)
target = read_image(screenshot_path)
if target is None:

View File

@@ -8,7 +8,7 @@ import uiautomator2 as u2
from Util.Kit import take_screenshot, detect_black_agree_button, click_image_template, detect_ad_close_x, detect_any_ad_close, detect_bottom_close_circle
from Util.ObsUtil import ObsUploader
from Util.XinDianTuReadImageKit import XinDianTuReadImageKit
from Config.Config import OBS_TMP_PREFIX, CDN_DOMAIN
from Config.Config import OBS_TMP_PREFIX, CDN_DOMAIN, TEMP_IMAGE_DIR
# pip install adbutils
# 配置日志输出,方便调试和监控
@@ -29,7 +29,7 @@ async def check_and_close_ad(d):
image_uuid = str(uuid.uuid4())
# 使用相对路径: 基于当前脚本目录下的 Images 文件夹
base_dir = os.path.dirname(os.path.abspath(__file__))
save_dir = os.path.join(base_dir, "./Images")
save_dir = TEMP_IMAGE_DIR
screenshot_path = take_screenshot(d, image_uuid, save_dir=save_dir)
logger.info(f"Step [广告检测截图] 耗时: {time.time() - t1:.4f}s")

View File

@@ -1,52 +1,21 @@
# 开发环境不同,配置信息不同
# Doris V4
# DORIS_HOST = "10.10.14.204"
# DORIS_PORT = 9030
# DORIS_FENODES = "10.10.14.204:8030"
# REDIS_HOST = '10.10.14.14'
# REDIS_PASSWORD = None # 如果没有密码则设为 None
DORIS_HOST = "www.hzkjai.com"
DORIS_PORT = 27025
DORIS_FENODES = "www.hzkjai.com:27024"
REDIS_HOST = '127.0.0.1'
REDIS_PASSWORD = "DsideaL147258369"
# 黄海在公司内网开发时的配置信息
DORIS_HOST = "10.10.14.204"
DORIS_PORT = 9030
DORIS_FENODES = "10.10.14.204:8030"
REDIS_HOST = '10.10.14.14'
REDIS_PASSWORD = None # 如果没有密码则设为 None
# 采集配置
# 滑动距离比例 (0.1 ~ 0.9),数值越大滑动幅度越大,建议 0.3-0.5 以避免错过中间内容
SCROLL_DISTANCE_RATIO = 0.3
# 最大滑动/翻页次数,达到此次数后停止采集
MAX_SCROLLS = 100
# 默认抓取半径(公里),当检测到场站距离超过此值时停止采集
MAX_CRAWL_DISTANCE = 50
# 场站去重过期时间(秒),在此时间内重复出现的场站不会再次点击进入详情页
REDIS_STATION_EXPIRE = 120
# 数据库数据保留时长超过此时长的历史数据is_current=0将被删除
DATA_RETENTION_DAYS = 365
# 等待时间配置 (秒)
# 点击进入详情页后等待加载的时间
WAIT_DETAIL_PAGE_LOAD = 2.5
# 从详情页返回列表页后等待页面刷新的时间
WAIT_BACK_TO_LIST = 1.5
# 执行滑动操作后等待页面内容加载和稳定的时间
WAIT_AFTER_SCROLL = 3.0
# 坐标计算与安全防护
# 屏幕顶部安全排除比例 (0.0~1.0),此比例区域内不进行点击(避开状态栏、筛选栏等)
SAFE_EXCLUDE_RATIO = 0.20
# 屏幕底部安全排除比例 (0.0~1.0),此比例区域内不进行点击(避开底部导航栏、功能按钮等)
BOTTOM_SAFE_EXCLUDE_RATIO = 0.1
# 默认回退屏幕宽度,当无法自动获取设备信息时使用
FALLBACK_WIDTH = 1080
# 默认回退屏幕高度,当无法自动获取设备信息时使用
FALLBACK_HEIGHT = 2400
# 黄海在家开发时的配置信息
#DORIS_HOST = "www.hzkjai.com"
#DORIS_PORT = 27025
#DORIS_FENODES = "www.hzkjai.com:27024"
#REDIS_HOST = '127.0.0.1'
#REDIS_PASSWORD = "DsideaL147258369"
# 视觉模型配置
VL_MODEL_NAME = "qwen3-vl-flash"
VL_MODEL_NAME_AD = "qwen-vl-max"
# 华为云配置
OBS_AK = "WAFBGJACKDOQZDH1MKZ1"
OBS_SK = "dlWTUbqgCICaYJG3n0Rot4jXaen2HnfFtMVxiPEo"
@@ -75,10 +44,12 @@ doris = {
"database": DORIS_DATABASE
}
# REDIS 配置
REDIS_DB = 2
REDIS_DECODE_RESPONSES = True
REDIS_PORT = 18890
REDIS_MAX_CONNECTIONS = 200
# 临时图片存储路径
TEMP_IMAGE_DIR = r"d:\dsWork\aiData\Output"

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 229 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 153 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

View File

@@ -1,29 +0,0 @@
# coding=utf-8
import os
import sys
import time
import numpy as np
# Add project root to sys.path
current_dir = os.path.dirname(os.path.abspath(__file__))
# project_root = os.path.dirname(current_dir)
# if project_root not in sys.path:
# sys.path.append(project_root)
print("Importing PaddleOCR...")
from paddleocr import PaddleOCR
print("Importing LlmUtil...")
# from Util.LlmUtil import get_llm_response
print("Imports done.")
def main():
image_path = os.path.join(current_dir, "2.jpg")
print(f"Initializing OCR...")
ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
print("Running OCR...")
result = ocr.ocr(image_path)
print("OCR Done.")
print(result)
if __name__ == "__main__":
main()

View File

@@ -1,119 +0,0 @@
# coding=utf-8
import os
import sys
from paddleocr import PaddleOCR
import numpy as np
def test_ocr():
# 1. 初始化 PaddleOCR
print("正在初始化 PaddleOCR 模型...")
try:
ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
except Exception as e:
print(f"初始化失败: {e}")
return
# 2. 准备测试图片
current_dir = os.path.dirname(os.path.abspath(__file__))
# 优先查找 2.jpg如果不存在则查找 1.jpg
image_path = os.path.join(current_dir, "2.jpg")
if not os.path.exists(image_path):
image_path = os.path.join(current_dir, "1.jpg")
if not os.path.exists(image_path):
print(f"错误: 未找到测试图片: {image_path}")
print("请将测试图片命名为 2.jpg 或 1.jpg 并放置在 Test 目录下。")
return
print(f"正在识别图片: {image_path}")
# 3. 执行识别
try:
# result 是一个列表,通常包含一个 OCRResult 对象 (新版) 或列表 (旧版)
result = ocr.ocr(image_path)
except Exception as e:
print(f"识别过程发生异常: {e}")
return
# 4. 输出结果
print("\n" + "="*20 + " 识别结果 " + "="*20)
sys.stdout.flush()
# 同时输出到文件,方便查看
output_file = os.path.join(current_dir, "ocr_output.txt")
with open(output_file, "w", encoding="utf-8") as f_out:
if not result:
msg = "未识别到任何文字 (Result is empty)。"
print(msg)
f_out.write(msg + "\n")
else:
# 取出第一个结果(通常是单张图片的结果)
res = result[0]
# 检查是否为 None
if res is None:
msg = "未识别到任何文字 (Result[0] is None)。"
print(msg)
f_out.write(msg + "\n")
# 情况 A: 新版 PaddleX OCRResult 对象 (表现为字典或对象)
elif hasattr(res, 'get') and 'rec_texts' in res:
texts = res.get('rec_texts', [])
scores = res.get('rec_scores', [])
if not texts:
msg = "未识别到任何文字 (rec_texts is empty)。"
print(msg)
f_out.write(msg + "\n")
else:
for i, text in enumerate(texts):
score = scores[i] if i < len(scores) else 0.0
msg = f"{i+1}: {text} (置信度: {score:.4f})"
print(msg)
f_out.write(msg + "\n")
# 情况 B: 对象属性访问
elif hasattr(res, 'rec_texts'):
texts = res.rec_texts
scores = res.rec_scores
if not texts:
msg = "未识别到任何文字 (rec_texts is empty)。"
print(msg)
f_out.write(msg + "\n")
else:
for i, text in enumerate(texts):
score = scores[i] if i < len(scores) else 0.0
msg = f"{i+1}: {text} (置信度: {score:.4f})"
print(msg)
f_out.write(msg + "\n")
# 情况 C: 旧版 list of lists 结构
elif isinstance(res, list):
for idx, line in enumerate(res):
try:
if len(line) >= 2 and isinstance(line[1], (tuple, list)):
text, score = line[1]
msg = f"{idx+1}: {text} (置信度: {score:.4f})"
print(msg)
f_out.write(msg + "\n")
else:
msg = f"{idx+1}: {line} (格式未知)"
print(msg)
f_out.write(msg + "\n")
except Exception as e:
print(f"{idx+1} 解析失败: {e}")
else:
msg = f"无法解析结果结构: {type(res)}"
print(msg)
print(f"Result content: {res}")
f_out.write(msg + "\n")
f_out.write(f"Result content: {res}\n")
print("="*50)
sys.stdout.flush()
if __name__ == "__main__":
test_ocr()

View File

@@ -1,173 +0,0 @@
# coding=utf-8
import os
import sys
import time
import asyncio
import json
# Add project root to sys.path
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir)
if project_root not in sys.path:
sys.path.append(project_root)
from paddleocr import PaddleOCR
from Util.LlmUtil import get_llm_response
from Util.OcrParser import OcrParser
import re
LOG_FILE = os.path.join(current_dir, "ocr_llm_debug.txt")
def log(msg):
print(msg)
sys.stdout.flush()
with open(LOG_FILE, "a", encoding="utf-8") as f:
f.write(msg + "\n")
# ... imports ...
def run_ocr_sync():
image_path = os.path.join(current_dir, "2.jpg")
if not os.path.exists(image_path):
image_path = os.path.join(current_dir, "1.jpg")
log(f"Testing OCR + LLM Pipeline on: {image_path}")
log("-" * 50)
# --- Step 1: PaddleOCR ---
t_start = time.time()
log("Initializing PaddleOCR...")
t_init_start = time.time()
try:
# 尝试使用轻量级模型 (Mobile) 以提升速度
# ocr_version='PP-OCRv4' 通常默认是 mobile
ocr = PaddleOCR(use_textline_orientation=True, lang="ch", ocr_version='PP-OCRv4')
except Exception as e:
log(f"PaddleOCR Init Failed: {e}")
return None, None
t_init_end = time.time()
log(f"PaddleOCR Init Time: {t_init_end - t_init_start:.4f}s")
log("Running OCR Inference...")
t_ocr_start = time.time()
try:
result = ocr.ocr(image_path)
except Exception as e:
log(f"OCR Inference Failed: {e}")
return None, None
t_ocr_end = time.time()
ocr_text_lines = []
# Handle different result structures
if not result:
log("OCR returned empty result.")
else:
res = result[0]
if res is None:
log("OCR result[0] is None.")
elif hasattr(res, 'get') and 'rec_texts' in res:
ocr_text_lines = res.get('rec_texts', [])
elif hasattr(res, 'rec_texts'):
ocr_text_lines = res.rec_texts
elif isinstance(res, list):
for line in res:
if len(line) >= 2 and isinstance(line[1], (tuple, list)):
ocr_text_lines.append(line[1][0])
ocr_text_block = "\n".join(ocr_text_lines)
log(f"OCR Result ({t_ocr_end - t_ocr_start:.4f}s):")
log(ocr_text_block)
log("-" * 50)
return ocr_text_lines, ocr_text_block, (t_ocr_start, t_ocr_end)
async def run_parsing_comparison(ocr_text_lines, ocr_text_block, timing_ocr):
t_ocr_start, t_ocr_end = timing_ocr
ocr_duration = t_ocr_end - t_ocr_start
# --- Mode 1: Regex Parsing ---
log("Running Regex Parsing...")
t_regex_start = time.time()
try:
regex_data = OcrParser.parse(ocr_text_lines)
log("\nParsed Data (Regex):")
log(json.dumps(regex_data, indent=2, ensure_ascii=False))
except Exception as e:
log(f"Regex Parsing Failed: {e}")
t_regex_end = time.time()
regex_duration = t_regex_end - t_regex_start
log(f"Regex Parsing Time: {regex_duration:.4f}s")
log("-" * 50)
# --- Mode 2: LLM Parsing ---
log("Running LLM Parsing...")
prompt = f"""
You are a data extraction assistant. Below is the OCR text recognized from a charging station list card.
Please extract the structured data and return it ONLY as a JSON object (no markdown, no extra text).
Fields to extract:
- station_name: (String) Name of the charging station.
- distance: (String) Distance info (e.g., "7.4km").
- price: (String) Price info (e.g., "0.7111/度").
- tags: (List[String]) Any tags like "", "闲3/4", "组团", "2倍积分", "P", etc.
- parking_info: (String) Parking related info.
OCR Text:
{ocr_text_block}
"""
t_llm_start = time.time()
response_text = ""
try:
log("Starting LLM request...")
async for chunk in get_llm_response(prompt, stream=True):
print(chunk, end='', flush=True)
response_text += chunk
print("\n")
log("LLM request finished.")
t_llm_end = time.time()
log(f"\nLLM Response ({t_llm_end - t_llm_start:.4f}s):")
log(response_text)
try:
clean_text = response_text.replace("```json", "").replace("```", "").strip()
data = json.loads(clean_text)
log("\nParsed JSON Data:")
log(json.dumps(data, indent=2, ensure_ascii=False))
except json.JSONDecodeError:
log("\nFailed to parse JSON directly.")
except Exception as e:
log(f"LLM Error: {e}")
t_llm_end = time.time()
log("-" * 50)
log(f"Summary:")
log(f"OCR Time: {ocr_duration:.4f}s")
log(f"Regex Parsing Time: {regex_duration:.4f}s")
log(f"LLM Parsing Time: {t_llm_end - t_llm_start:.4f}s")
total_regex = ocr_duration + regex_duration
total_llm = ocr_duration + (t_llm_end - t_llm_start)
log(f"Total Pipeline (OCR+Regex): {total_regex:.4f}s")
log(f"Total Pipeline (OCR+LLM): {total_llm:.4f}s")
def main():
# Clear log file
with open(LOG_FILE, "w", encoding="utf-8") as f:
f.write("Starting TestOcrLlm...\n")
ocr_lines, ocr_text, timing = run_ocr_sync()
if ocr_lines:
asyncio.run(run_parsing_comparison(ocr_lines, ocr_text, timing))
if __name__ == "__main__":
main()

View File

@@ -1,107 +0,0 @@
# coding=utf-8
import re
import json
def parse_ocr_lines(lines):
result = {
"station_name": "",
"distance": "",
"price": None,
"piles": [],
"parking": "",
"tags": []
}
# Pre-process lines: remove confidence scores for parsing
clean_lines = []
for line in lines:
# Remove prefix "行 X: "
text = re.sub(r"^行\s*\d+:\s*", "", line)
# Remove suffix "(置信度: ...)"
text = re.sub(r"\s*\(置信度:.*?\)\s*$", "", text)
clean_lines.append(text.strip())
# 1. Station Name
for line in clean_lines:
# Skip empty or short noise
if len(line) < 2: continue
# Skip if starts with special chars
if line.startswith("") or line.startswith("("): continue
# Skip if contains typical attribute keywords
if "km" in line.lower() or "/度" in line or "" in line: continue
# Skip if strictly numeric (unlikely for name)
if re.match(r"^\d+$", line): continue
result["station_name"] = line
break
# 2. Distance
for line in clean_lines:
m = re.search(r"(\d+(\.\d+)?)\s*km", line, re.IGNORECASE)
if m:
result["distance"] = m.group(0)
break
# 3. Price (Standard)
# Look for "0.xxxx/度"
for line in clean_lines:
m = re.search(r"(\d+\.\d+)(?=/度)", line)
if m:
result["price"] = float(m.group(1))
break
# 4. Piles
current_type = "未知"
for line in clean_lines:
if "" in line: current_type = ""
elif "" in line: current_type = ""
elif "" in line: current_type = ""
# Match "闲3/4" or "3/4"
# Regex: optional "闲", int, /, int
m = re.search(r"(?:闲)?(\d+)/(\d+)", line)
if m:
# Check if it looks like a price (contains dot)
if "." in line: continue
free = int(m.group(1))
total = int(m.group(2))
result["piles"].append({
"type": current_type,
"free": free,
"total": total
})
# 5. Parking
for line in clean_lines:
if "停车" in line:
# Clean up leading punctuation
cleaned = re.sub(r"^[·\.\sP]+", "", line)
result["parking"] = cleaned
break
return result
if __name__ == "__main__":
# User provided sample data
sample_input = [
"行 1: 长春市绿园区雁鸣湖公共充电站 (置信度: 0.9963)",
"行 2: (… (置信度: 0.6244)",
"行 3: 7.4km (置信度: 0.9975)",
"行 4: 0.7111/度 (置信度: 0.9450)",
"行 5: 快 (置信度: 0.9987)",
"行 6: 闲3/4 (置信度: 0.9941)",
"行 7: ¥ (置信度: 0.8734)",
"行 8: 组团 (置信度: 0.9995)",
"行 9: 2倍积分 (置信度: 0.9997)",
"行 10: P (置信度: 0.9929)",
"行 11: ·收费停车:以场地实际收费为准 (置信度: 0.9736)"
]
print("--- Input Data ---")
for l in sample_input:
print(l)
parsed = parse_ocr_lines(sample_input)
print("\n--- Parsed Result (Regex) ---")
print(json.dumps(parsed, ensure_ascii=False, indent=2))

View File

@@ -1,118 +0,0 @@
# coding=utf-8
import os
import sys
import cv2
import json
import time
# Add project root to sys.path
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir)
if project_root not in sys.path:
sys.path.append(project_root)
from Util import Kit
from Util.PaddleOCRKit import get_ocr_kit
def test_integration():
image_path = os.path.join(current_dir, "2.jpg")
if not os.path.exists(image_path):
print(f"Image not found: {image_path}")
return
print(f"Testing integration on: {image_path}")
# 1. Generate Metadata using Kit
print("Running Kit.crop_cards_from_image...")
Kit.crop_cards_from_image(image_path, output_dir=current_dir)
json_path = image_path.replace(".jpg", ".json")
json_metadata = {}
if os.path.exists(json_path):
with open(json_path, 'r', encoding='utf-8') as f:
json_metadata = json.load(f)
if not json_metadata.get("cards"):
print("Kit failed to find cards (expected for single card image). Mocking metadata.")
img = cv2.imread(image_path)
h, w = img.shape[:2]
json_metadata = {
"cards": [
{
"id": 1,
"rect": [0, 0, w, h],
"click_point": [w//2, h//2]
}
]
}
print(f"Loaded metadata with {len(json_metadata.get('cards', []))} cards.")
# 2. Run OCR Logic (Simulating Crawler.py)
print("Running OCR Logic...")
ocr_kit = get_ocr_kit()
original_img = cv2.imread(image_path)
stations = []
t_start = time.time()
if "cards" in json_metadata and original_img is not None:
h_img, w_img = original_img.shape[:2]
for card in json_metadata["cards"]:
rect = card.get("rect")
if not rect: continue
x1, y1, x2, y2 = rect
# 边界检查
x1 = max(0, min(x1, w_img))
x2 = max(0, min(x2, w_img))
y1 = max(0, min(y1, h_img))
y2 = max(0, min(y2, h_img))
if x2 <= x1 or y2 <= y1: continue
# 裁剪卡片
cropped_card = original_img[y1:y2, x1:x2]
# 识别
parsed_data = ocr_kit.recognize(cropped_card)
print(f"Parsed Data: {json.dumps(parsed_data, indent=2, ensure_ascii=False)}")
if parsed_data and parsed_data.get("station_name"):
# 格式化数据
piles_list = parsed_data.get("piles", [])
piles_str_parts = []
for p in piles_list:
p_type = p.get("type", "")
p_free = p.get("free", 0)
p_total = p.get("total", 0)
piles_str_parts.append(f"{p_type}:{p_free}/{p_total}")
piles_str = " ".join(piles_str_parts)
station_info = {
"station_name": parsed_data.get("station_name"),
"price": str(parsed_data.get("price")) if parsed_data.get("price") is not None else "",
"piles": piles_str,
"distance": parsed_data.get("distance", ""),
"uia_center_x": card["click_point"][0],
"uia_center_y": card["click_point"][1],
"tags": parsed_data.get("tags", []),
"parking_info": parsed_data.get("parking", "")
}
stations.append(station_info)
t_end = time.time()
print("-" * 50)
print(f"Total Processing Time: {t_end - t_start:.4f}s")
print(f"Found {len(stations)} stations:")
for s in stations:
print(json.dumps(s, indent=2, ensure_ascii=False))
if __name__ == "__main__":
test_integration()

View File

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

View File

@@ -1,78 +0,0 @@
Starting TestOcrLlm...
Testing OCR + LLM Pipeline on: D:\dsWork\YltProject\dsCrawler\Test\2.jpg
--------------------------------------------------
Initializing PaddleOCR...
PaddleOCR Init Time: 1.7990s
Running OCR Inference...
OCR Result (1.7636s):
长春市绿园区雁鸣湖公共充电站
..
17.4km
0.7111/度
闲3/4
组团
2倍积分
P
收费停车:以场地实际收费为准
--------------------------------------------------
Running Regex Parsing...
Parsed Data (Regex):
{
"station_name": "长春市绿园区雁鸣湖公共充电站",
"distance": "17.4km",
"price": 0.7111,
"piles": [
{
"type": "快",
"free": 3,
"total": 4
}
],
"parking": "收费停车:以场地实际收费为准",
"tags": [
"组团",
"2倍积分",
"P"
]
}
Regex Parsing Time: 0.0015s
--------------------------------------------------
Running LLM Parsing...
Starting LLM request...
LLM request finished.
LLM Response (3.6250s):
```json
{
"station_name": "长春市绿园区雁鸣湖公共充电站",
"distance": "17.4km",
"price": "0.7111/度",
"tags": ["快", "闲3/4", "组团", "2倍积分", "P"],
"parking_info": "收费停车:以场地实际收费为准"
}
```
Parsed JSON Data:
{
"station_name": "长春市绿园区雁鸣湖公共充电站",
"distance": "17.4km",
"price": "0.7111/度",
"tags": [
"快",
"闲3/4",
"组团",
"2倍积分",
"P"
],
"parking_info": "收费停车:以场地实际收费为准"
}
--------------------------------------------------
Summary:
OCR Time: 1.7636s
Regex Parsing Time: 0.0015s
LLM Parsing Time: 3.6250s
Total Pipeline (OCR+Regex): 1.7652s
Total Pipeline (OCR+LLM): 5.3886s

Binary file not shown.

View File

@@ -1,27 +0,0 @@
OCR Result:
长春市绿园区雁鸣湖公共充电站
(…
7.4km
0.7111/度
闲3/4
组团
2倍积分
P
·收费停车:以场地实际收费为准
LLM Response:
{
"station_name": "长春市绿园区雁鸣湖公共充电站",
"distance": "7.4km",
"price": "0.7111/度",
"tags": ["快", "闲3/4", "组团", "2倍积分", "P"],
"parking_info": "收费停车:以场地实际收费为准"
}
--------------------------------------------------
Summary:
OCR Time: 15.7442s
LLM Time: 3.1676s
Total Pipeline Time (excluding init): 18.9118s

View File

@@ -1,11 +0,0 @@
行 1: 长春市绿园区雁鸣湖公共充电站 (置信度: 0.9963)
行 2: (… (置信度: 0.6244)
行 3: 7.4km (置信度: 0.9975)
行 4: 0.7111/度 (置信度: 0.9450)
行 5: 快 (置信度: 0.9987)
行 6: 闲3/4 (置信度: 0.9941)
行 7: ¥ (置信度: 0.8734)
行 8: 组团 (置信度: 0.9995)
行 9: 2倍积分 (置信度: 0.9997)
行 10: P (置信度: 0.9929)
行 11: ·收费停车:以场地实际收费为准 (置信度: 0.9736)

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

View File

@@ -1,9 +0,0 @@
import uiautomator2 as u2
d = u2.connect()
d.reset_uiautomator()
d.sleep(3)
xml = d.dump_hierarchy() # 导出当前UI结构XML
with open("ui_hierarchy5_2.xml", "w", encoding="utf-8") as f:
f.write(xml)

View File

@@ -1,14 +0,0 @@
from Util.ObsUtil import ObsUploader
from Config.Config import OBS_TMP_PREFIX
uploader = ObsUploader()
object_key = OBS_TMP_PREFIX + "/1d3eb56c-942e-42d3-8993-f1ea8ad7d97b.jpg"
success, result = uploader.upload_file(
object_key=object_key,
file_path="Screenshot/1d3eb56c-942e-42d3-8993-f1ea8ad7d97b.jpg"
)
print(success)
print(result)

View File

@@ -1,12 +0,0 @@
import uiautomator2 as u2
d = u2.connect()
# 启动应用(以微信为例)
d.app_start("com.tencent.mm")
# 截微信图保存
d.screenshot("home.jpg")
# 关闭应用
d.app_stop("com.tencent.mm")

View File

@@ -1,45 +0,0 @@
<?xml version='1.0' encoding='UTF-8' standalone='yes' ?>
<hierarchy rotation="0">
<node index="0" text="" resource-id="" class="android.widget.FrameLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[0,0][1080,72]" drawing-order="0" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/status_bar_launch_animation_container" class="android.widget.FrameLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[0,0][1080,72]" drawing-order="1" hint="" display-id="0" />
<node index="1" text="" resource-id="com.android.systemui:id/status_bar_container" class="android.widget.FrameLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[0,0][1080,72]" drawing-order="2" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/status_bar" class="android.widget.FrameLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[0,0][1080,72]" drawing-order="1" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/status_bar_contents" class="android.widget.LinearLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[0,0][1020,72]" drawing-order="2" hint="" display-id="0">
<node index="0" text="" resource-id="" class="android.widget.FrameLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[24,0][510,72]" drawing-order="1" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/status_bar_left_side" class="android.widget.LinearLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[24,0][510,72]" drawing-order="2" hint="" display-id="0">
<node index="0" text="9:15" resource-id="com.android.systemui:id/clock" class="android.widget.TextView" package="com.android.systemui" content-desc="上午9:15" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[24,0][113,72]" drawing-order="2" hint="" display-id="0" />
<node index="1" text="" resource-id="com.android.systemui:id/notification_icon_area" class="android.widget.FrameLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[113,0][510,72]" drawing-order="4" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/notification_icon_area_inner" class="android.widget.LinearLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[113,0][510,72]" drawing-order="1" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/notificationIcons" class="android.widget.FrameLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[113,0][510,72]" drawing-order="1" hint="" display-id="0" />
</node>
</node>
</node>
</node>
<node index="1" text="" resource-id="com.android.systemui:id/system_icon_area" class="android.widget.LinearLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[510,0][996,72]" drawing-order="4" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/system_icons" class="android.widget.LinearLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[510,0][996,72]" drawing-order="1" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/statusIcons" class="android.widget.LinearLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[510,0][973,72]" drawing-order="1" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/wifi_combo" class="android.widget.FrameLayout" package="com.android.systemui" content-desc="WLAN 信号满格。" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[857,3][910,69]" drawing-order="14" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/wifi_group" class="android.widget.LinearLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[865,3][910,69]" drawing-order="1" hint="" display-id="0">
<node index="0" text="" resource-id="com.android.systemui:id/wifi_combo" class="android.widget.FrameLayout" package="com.android.systemui" content-desc="" checkable="false" checked="false" clickable="false" enabled="true" focusable="false" focused="false" scrollable="false" long-clickable="false" password="false" selected="false" visible-to-user="true" bounds="[865,13][910,58]" drawing-order="2" hint="" display-id="0">

View File

@@ -8,7 +8,6 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(level
logger = logging.getLogger(__name__)
# 将项目根目录添加到系统路径,以便能够导入 DbKit 和 Config 等模块
# DbKit/doris_ddl.py -> DbKit -> Root (2 levels up)
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
@@ -31,7 +30,7 @@ async def init_tables():
logger.info("正在读取 SQL 文件...")
# SQL 文件路径:相对于当前脚本所在目录 (DbKit/)
sql_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '', 'doris_ddl.sql')
sql_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '', 'Sql/doris_ddl.sql')
logger.info(f"使用 SQL 文件: {sql_path}")

View File

@@ -5,6 +5,8 @@ import os
from datetime import datetime
import uiautomator2 as u2
from Config.Config import TEMP_IMAGE_DIR
# 配置日志输出
logging.basicConfig(
level=logging.INFO,
@@ -22,14 +24,8 @@ async def take_screenshot():
# 执行截图
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"current_{timestamp}.jpg"
# 获取目标目录: 项目根目录下的 Temp/DevTools
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
target_dir = os.path.join(project_root, "Temp", "DevTools")
os.makedirs(target_dir, exist_ok=True)
filepath = os.path.join(target_dir, filename)
filename = f"Screenshot_{timestamp}.jpg"
filepath = os.path.join(TEMP_IMAGE_DIR, filename)
logger.info(f"正在拍照(截图)并保存至: {filepath}")
d.screenshot(filepath)

View File

@@ -1,73 +0,0 @@
# -*- coding: utf-8 -*-
"""
基于行扫描和统计特征的卡片截取工具
"""
import sys
import os
# 添加项目根目录到系统路径
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
sys.path.append(project_root)
from Util import Kit
def clean_directory(dir_path):
"""
清理现场:删除目录下除纯数字.jpg以外的所有文件
"""
print(f"Cleaning directory: {dir_path}")
if not os.path.exists(dir_path):
return
for filename in os.listdir(dir_path):
path = os.path.join(dir_path, filename)
if not os.path.isfile(path):
continue
name, ext = os.path.splitext(filename)
# 保留条件:后缀是 .jpg 且文件名是纯数字
# [修改] 同时保留 _flag.jpg, _vl.jpg 供查看
# [修改] 保留 .json 文件
if ext.lower() == ".json":
continue
if ext.lower() == ".jpg" and (name.isdigit() or name.endswith("_flag") or name.endswith("_vl")):
continue
# 清理旧的 _for_vl.jpg
if name.endswith("_for_vl"):
pass # Let it fall through to delete
else:
# 其他非生成的文件,可能需要保留吗?
# 这里的逻辑是清理 output 目录,假设该目录下只有生成的图片
# 如果是原始图片(比如 1.jpg不能删
pass
# 原始图片保护 (文件名比较短,通常是 1.jpg, 2.jpg 等)
if len(name) <= 2 and name.isdigit():
continue
try:
os.remove(path)
print(f" Deleted: {filename}")
except Exception as e:
print(f" Error deleting {filename}: {e}")
def crop_cards(img_path):
Kit.crop_cards_from_image(img_path)
if __name__ == "__main__":
test_files = [
r"d:\dsWork\dsProject\dsCrawler\Tools\Images\1.jpg",
r"d:\dsWork\dsProject\dsCrawler\Tools\Images\2.jpg",
r"d:\dsWork\dsProject\dsCrawler\Tools\Images\3.jpg",
r"d:\dsWork\dsProject\dsCrawler\Tools\Images\4.jpg"
]
# 在测试前清理现场
if test_files:
# 假设所有图片都在同一个文件夹
target_dir = os.path.dirname(test_files[0])
clean_directory(target_dir)
for f in test_files:
crop_cards(f)

Binary file not shown.