aiData/Test/TestPaddleOCRKit.py

# coding=utf-8
import os
import sys
import cv2
import json
import time

# Add project root to sys.path
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir)
if project_root not in sys.path:
    sys.path.append(project_root)

from Util import Kit
from Util.PaddleOCRKit import get_ocr_kit

def test_integration():
    image_path = os.path.join(current_dir, "2.jpg")
    if not os.path.exists(image_path):
        print(f"Image not found: {image_path}")
        return

    print(f"Testing integration on: {image_path}")
    
    # 1. Generate Metadata using Kit
    print("Running Kit.crop_cards_from_image...")
    Kit.crop_cards_from_image(image_path, output_dir=current_dir)
    
    json_path = image_path.replace(".jpg", ".json")
    json_metadata = {}
    
    if os.path.exists(json_path):
        with open(json_path, 'r', encoding='utf-8') as f:
            json_metadata = json.load(f)
            
    if not json_metadata.get("cards"):
        print("Kit failed to find cards (expected for single card image). Mocking metadata.")
        img = cv2.imread(image_path)
        h, w = img.shape[:2]
        json_metadata = {
            "cards": [
                {
                    "id": 1,
                    "rect": [0, 0, w, h],
                    "click_point": [w//2, h//2]
                }
            ]
        }
        
    print(f"Loaded metadata with {len(json_metadata.get('cards', []))} cards.")

    # 2. Run OCR Logic (Simulating Crawler.py)
    print("Running OCR Logic...")
    
    ocr_kit = get_ocr_kit()
    
    original_img = cv2.imread(image_path)
    stations = []
    
    t_start = time.time()
    
    if "cards" in json_metadata and original_img is not None:
        h_img, w_img = original_img.shape[:2]
        
        for card in json_metadata["cards"]:
            rect = card.get("rect")
            if not rect: continue
            x1, y1, x2, y2 = rect
            
            # 边界检查
            x1 = max(0, min(x1, w_img))
            x2 = max(0, min(x2, w_img))
            y1 = max(0, min(y1, h_img))
            y2 = max(0, min(y2, h_img))
            
            if x2 <= x1 or y2 <= y1: continue
            
            # 裁剪卡片
            cropped_card = original_img[y1:y2, x1:x2]
            
            # 识别
            parsed_data = ocr_kit.recognize(cropped_card)
            print(f"Parsed Data: {json.dumps(parsed_data, indent=2, ensure_ascii=False)}")
            
            if parsed_data and parsed_data.get("station_name"):
                 # 格式化数据
                 piles_list = parsed_data.get("piles", [])
                 piles_str_parts = []
                 for p in piles_list:
                     p_type = p.get("type", "")
                     p_free = p.get("free", 0)
                     p_total = p.get("total", 0)
                     piles_str_parts.append(f"{p_type}:{p_free}/{p_total}")
                 
                 piles_str = " ".join(piles_str_parts)
                 
                 station_info = {
                     "station_name": parsed_data.get("station_name"),
                     "price": str(parsed_data.get("price")) if parsed_data.get("price") is not None else "",
                     "piles": piles_str,
                     "distance": parsed_data.get("distance", ""),
                     "uia_center_x": card["click_point"][0],
                     "uia_center_y": card["click_point"][1],
                     "tags": parsed_data.get("tags", []),
                     "parking_info": parsed_data.get("parking", "")
                 }
                 stations.append(station_info)

    t_end = time.time()
    
    print("-" * 50)
    print(f"Total Processing Time: {t_end - t_start:.4f}s")
    print(f"Found {len(stations)} stations:")
    for s in stations:
        print(json.dumps(s, indent=2, ensure_ascii=False))

if __name__ == "__main__":
    test_integration()
'commit' 2026-01-12 07:49:18 +08:00			`# coding=utf-8`
			`import os`
			`import sys`
			`import cv2`
			`import json`
			`import time`

			`# Add project root to sys.path`
			`current_dir = os.path.dirname(os.path.abspath(__file__))`
			`project_root = os.path.dirname(current_dir)`
			`if project_root not in sys.path:`
			`sys.path.append(project_root)`

			`from Util import Kit`
			`from Util.PaddleOCRKit import get_ocr_kit`

			`def test_integration():`
			`image_path = os.path.join(current_dir, "2.jpg")`
			`if not os.path.exists(image_path):`
			`print(f"Image not found: {image_path}")`
			`return`

			`print(f"Testing integration on: {image_path}")`

			`# 1. Generate Metadata using Kit`
			`print("Running Kit.crop_cards_from_image...")`
			`Kit.crop_cards_from_image(image_path, output_dir=current_dir)`

			`json_path = image_path.replace(".jpg", ".json")`
			`json_metadata = {}`

			`if os.path.exists(json_path):`
			`with open(json_path, 'r', encoding='utf-8') as f:`
			`json_metadata = json.load(f)`

			`if not json_metadata.get("cards"):`
			`print("Kit failed to find cards (expected for single card image). Mocking metadata.")`
			`img = cv2.imread(image_path)`
			`h, w = img.shape[:2]`
			`json_metadata = {`
			`"cards": [`
			`{`
			`"id": 1,`
			`"rect": [0, 0, w, h],`
			`"click_point": [w//2, h//2]`
			`}`
			`]`
			`}`

			`print(f"Loaded metadata with {len(json_metadata.get('cards', []))} cards.")`

			`# 2. Run OCR Logic (Simulating Crawler.py)`
			`print("Running OCR Logic...")`

			`ocr_kit = get_ocr_kit()`

			`original_img = cv2.imread(image_path)`
			`stations = []`

			`t_start = time.time()`

			`if "cards" in json_metadata and original_img is not None:`
			`h_img, w_img = original_img.shape[:2]`

			`for card in json_metadata["cards"]:`
			`rect = card.get("rect")`
			`if not rect: continue`
			`x1, y1, x2, y2 = rect`

			`# 边界检查`
			`x1 = max(0, min(x1, w_img))`
			`x2 = max(0, min(x2, w_img))`
			`y1 = max(0, min(y1, h_img))`
			`y2 = max(0, min(y2, h_img))`

			`if x2 <= x1 or y2 <= y1: continue`

			`# 裁剪卡片`
			`cropped_card = original_img[y1:y2, x1:x2]`

			`# 识别`
			`parsed_data = ocr_kit.recognize(cropped_card)`
			`print(f"Parsed Data: {json.dumps(parsed_data, indent=2, ensure_ascii=False)}")`

			`if parsed_data and parsed_data.get("station_name"):`
			`# 格式化数据`
			`piles_list = parsed_data.get("piles", [])`
			`piles_str_parts = []`
			`for p in piles_list:`
			`p_type = p.get("type", "")`
			`p_free = p.get("free", 0)`
			`p_total = p.get("total", 0)`
			`piles_str_parts.append(f"{p_type}:{p_free}/{p_total}")`

			`piles_str = " ".join(piles_str_parts)`

			`station_info = {`
			`"station_name": parsed_data.get("station_name"),`
			`"price": str(parsed_data.get("price")) if parsed_data.get("price") is not None else "",`
			`"piles": piles_str,`
			`"distance": parsed_data.get("distance", ""),`
			`"uia_center_x": card["click_point"][0],`
			`"uia_center_y": card["click_point"][1],`
			`"tags": parsed_data.get("tags", []),`
			`"parking_info": parsed_data.get("parking", "")`
			`}`
			`stations.append(station_info)`

			`t_end = time.time()`

			`print("-" * 50)`
			`print(f"Total Processing Time: {t_end - t_start:.4f}s")`
			`print(f"Found {len(stations)} stations:")`
			`for s in stations:`
			`print(json.dumps(s, indent=2, ensure_ascii=False))`

			`if __name__ == "__main__":`
			`test_integration()`