124 lines
4.6 KiB
Python
124 lines
4.6 KiB
Python
# coding=utf-8
|
|
import os
|
|
import sys
|
|
import logging
|
|
import cv2
|
|
import numpy as np
|
|
from paddleocr import PaddleOCR
|
|
from Util.OcrParser import OcrParser
|
|
|
|
# Initialize logger
|
|
logger = logging.getLogger("PaddleOCRKit")
|
|
|
|
class PaddleOCRKit:
|
|
_instance = None
|
|
_ocr = None
|
|
|
|
def __new__(cls):
|
|
if cls._instance is None:
|
|
cls._instance = super(PaddleOCRKit, cls).__new__(cls)
|
|
cls._instance._initialize_model()
|
|
return cls._instance
|
|
|
|
def _initialize_model(self):
|
|
"""
|
|
Initialize the PaddleOCR model once.
|
|
Using PP-OCRv4 mobile model for speed.
|
|
"""
|
|
try:
|
|
# 1. 抑制 PaddlePaddle 的日志输出
|
|
os.environ['GLOG_minloglevel'] = '3' # 抑制 GLOG (C++) 日志
|
|
os.environ['FLAGS_allocator_strategy'] = 'auto_growth' # 减少显存占用警告
|
|
|
|
# 2. 抑制 Python 层的 paddle 日志
|
|
import logging as py_logging
|
|
py_logging.getLogger('paddle').setLevel(py_logging.ERROR)
|
|
py_logging.getLogger('ppocr').setLevel(py_logging.ERROR)
|
|
|
|
logger.info("Initializing PaddleOCR (PP-OCRv4 Mobile)...")
|
|
# use_angle_cls is deprecated, using use_textline_orientation instead
|
|
# ocr_version='PP-OCRv4' defaults to mobile model
|
|
self._ocr = PaddleOCR(
|
|
use_textline_orientation=True,
|
|
lang="ch",
|
|
ocr_version='PP-OCRv4',
|
|
show_log=False # 关键:关闭 PaddleOCR 内部日志打印
|
|
)
|
|
logger.info("PaddleOCR initialized successfully.")
|
|
except Exception as e:
|
|
logger.error(f"Failed to initialize PaddleOCR: {e}")
|
|
self._ocr = None
|
|
|
|
def recognize(self, image_input):
|
|
"""
|
|
Recognize text from an image and parse it into structured data.
|
|
|
|
:param image_input: Image path (str) or OpenCV image object (numpy.ndarray)
|
|
:return: Parsed structured data (dict) or None if failed
|
|
"""
|
|
if self._ocr is None:
|
|
logger.error("PaddleOCR model is not initialized.")
|
|
return None
|
|
|
|
try:
|
|
# PaddleOCR handles both path and ndarray
|
|
# predict() is recommended over ocr() in newer versions, but ocr() is the stable high-level API
|
|
# We use ocr() as in the test script which worked well.
|
|
# cls=True was causing issues in test, but use_textline_orientation=True in init handles it?
|
|
# actually in test script: ocr = PaddleOCR(use_textline_orientation=True, ...)
|
|
# result = ocr.ocr(image_path) -> this worked, but gave a warning about predict.
|
|
# Let's stick to ocr() for now as it returns the structure we expect.
|
|
|
|
result = self._ocr.ocr(image_input)
|
|
|
|
ocr_text_lines = []
|
|
|
|
# Handle result structure
|
|
if not result:
|
|
logger.warning("OCR returned empty result.")
|
|
else:
|
|
res = result[0]
|
|
if res is None:
|
|
logger.warning("OCR result[0] is None.")
|
|
elif isinstance(res, dict) and 'rec_texts' in res:
|
|
# New PaddleOCR / Paddlex format
|
|
ocr_text_lines = res['rec_texts']
|
|
elif isinstance(res, list):
|
|
# Legacy PaddleOCR format
|
|
for line in res:
|
|
# line structure: [[points], (text, confidence)]
|
|
if len(line) >= 2 and isinstance(line[1], (tuple, list)):
|
|
ocr_text_lines.append(line[1][0])
|
|
|
|
# --- 关键:打印原始识别内容 ---
|
|
if ocr_text_lines:
|
|
logger.info(f"🔍 OCR 原始识别文字 ({len(ocr_text_lines)} 行):")
|
|
for i, line in enumerate(ocr_text_lines):
|
|
logger.info(f" [{i+1}] {line}")
|
|
else:
|
|
logger.info("No text detected.")
|
|
return {}
|
|
|
|
# Parse with Regex Parser
|
|
parsed_data = OcrParser.parse(ocr_text_lines)
|
|
|
|
# --- 关键:打印解析后的结构化数据 ---
|
|
if parsed_data:
|
|
logger.info("📦 OCR 解析结果 (JSON):")
|
|
logger.info(f" {parsed_data}")
|
|
|
|
return parsed_data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during OCR recognition: {e}")
|
|
return None
|
|
|
|
# Global instance for easy access
|
|
_kit_instance = None
|
|
|
|
def get_ocr_kit():
|
|
global _kit_instance
|
|
if _kit_instance is None:
|
|
_kit_instance = PaddleOCRKit()
|
|
return _kit_instance
|