Files
aiData/Util/EasyOcrKit.py
HuangHai 8292bf83d1 'commit'
2026-01-25 15:06:17 +08:00

94 lines
3.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
import easyocr
import numpy as np
import cv2
import logging
logger = logging.getLogger(__name__)
class EasyOcrKit:
_instance = None
_reader = None
def __new__(cls, *args, **kwargs):
if cls._instance is None:
cls._instance = super(EasyOcrKit, cls).__new__(cls)
return cls._instance
def __init__(self, langs=['ch_sim', 'en'], gpu=True):
"""
初始化 EasyOCR Reader
:param langs: 识别语言列表
:param gpu: 是否使用 GPU
"""
# 检查是否需要重新初始化(例如从 GPU 切换到 CPU
if self._reader is not None:
if hasattr(self, '_using_gpu') and self._using_gpu != gpu:
logger.info(f"检测到 GPU 模式变更 ({self._using_gpu} -> {gpu}),重新初始化 Reader...")
self._reader = None
if self._reader is None:
try:
self._reader = easyocr.Reader(langs, gpu=gpu)
self._using_gpu = gpu # 记录当前使用的模式
logger.info(f"EasyOCR Reader 初始化成功 (gpu={gpu})")
except Exception as e:
logger.error(f"EasyOCR Reader 初始化失败: {e}")
# 如果 GPU 失败,尝试回退到 CPU
if gpu:
logger.warning("尝试回退到 CPU 模式...")
try:
self._reader = easyocr.Reader(langs, gpu=False)
self._using_gpu = False
except Exception as ex:
logger.error(f"EasyOCR CPU 模式回退也失败: {ex}")
def read_text(self, image):
"""
识别图片中的文字
:param image: 图片路径或 OpenCV 图像对象
:return: EasyOCR 识别结果列表
"""
if self._reader is None:
return []
return self._reader.readtext(image)
def find_text_position(self, image, target_text, threshold=0.5):
"""
在图片中查找特定文本的位置
:param image: 图片路径或 OpenCV 图像对象
:param target_text: 目标文本
:param threshold: 置信度阈值
:return: (found_text, quad, probability) 如果没找到则返回 None
"""
results = self.read_text(image)
for (quad, text, prob) in results:
if target_text in text and prob >= threshold:
return text, quad, prob
return None
def get_normalized_rect(self, quad, width, height):
"""
获取归一化的矩形坐标 [x1, y1, x2, y2] (0-1000)
:param quad: EasyOCR 返回的四个顶点坐标
:param width: 图片宽度
:param height: 图片高度
:return: [x1, y1, x2, y2]
"""
pts = np.array(quad).astype(int)
x_min = np.min(pts[:, 0])
y_min = np.min(pts[:, 1])
x_max = np.max(pts[:, 0])
y_max = np.max(pts[:, 1])
return [
int(max(0, x_min) * 1000 / width),
int(max(0, y_min) * 1000 / height),
int(min(width, x_max) * 1000 / width),
int(min(height, y_max) * 1000 / height)
]
# 便捷函数
def get_easyocr_reader(gpu=True):
return EasyOcrKit(gpu=gpu)