94 lines
3.3 KiB
Python
94 lines
3.3 KiB
Python
# coding=utf-8
|
||
import easyocr
|
||
import numpy as np
|
||
import cv2
|
||
import logging
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
class EasyOcrKit:
|
||
_instance = None
|
||
_reader = None
|
||
|
||
def __new__(cls, *args, **kwargs):
|
||
if cls._instance is None:
|
||
cls._instance = super(EasyOcrKit, cls).__new__(cls)
|
||
return cls._instance
|
||
|
||
def __init__(self, langs=['ch_sim', 'en'], gpu=True):
|
||
"""
|
||
初始化 EasyOCR Reader
|
||
:param langs: 识别语言列表
|
||
:param gpu: 是否使用 GPU
|
||
"""
|
||
# 检查是否需要重新初始化(例如从 GPU 切换到 CPU)
|
||
if self._reader is not None:
|
||
if hasattr(self, '_using_gpu') and self._using_gpu != gpu:
|
||
logger.info(f"检测到 GPU 模式变更 ({self._using_gpu} -> {gpu}),重新初始化 Reader...")
|
||
self._reader = None
|
||
|
||
if self._reader is None:
|
||
try:
|
||
self._reader = easyocr.Reader(langs, gpu=gpu)
|
||
self._using_gpu = gpu # 记录当前使用的模式
|
||
logger.info(f"EasyOCR Reader 初始化成功 (gpu={gpu})")
|
||
except Exception as e:
|
||
logger.error(f"EasyOCR Reader 初始化失败: {e}")
|
||
# 如果 GPU 失败,尝试回退到 CPU
|
||
if gpu:
|
||
logger.warning("尝试回退到 CPU 模式...")
|
||
try:
|
||
self._reader = easyocr.Reader(langs, gpu=False)
|
||
self._using_gpu = False
|
||
except Exception as ex:
|
||
logger.error(f"EasyOCR CPU 模式回退也失败: {ex}")
|
||
|
||
def read_text(self, image):
|
||
"""
|
||
识别图片中的文字
|
||
:param image: 图片路径或 OpenCV 图像对象
|
||
:return: EasyOCR 识别结果列表
|
||
"""
|
||
if self._reader is None:
|
||
return []
|
||
return self._reader.readtext(image)
|
||
|
||
def find_text_position(self, image, target_text, threshold=0.5):
|
||
"""
|
||
在图片中查找特定文本的位置
|
||
:param image: 图片路径或 OpenCV 图像对象
|
||
:param target_text: 目标文本
|
||
:param threshold: 置信度阈值
|
||
:return: (found_text, quad, probability) 如果没找到则返回 None
|
||
"""
|
||
results = self.read_text(image)
|
||
for (quad, text, prob) in results:
|
||
if target_text in text and prob >= threshold:
|
||
return text, quad, prob
|
||
return None
|
||
|
||
def get_normalized_rect(self, quad, width, height):
|
||
"""
|
||
获取归一化的矩形坐标 [x1, y1, x2, y2] (0-1000)
|
||
:param quad: EasyOCR 返回的四个顶点坐标
|
||
:param width: 图片宽度
|
||
:param height: 图片高度
|
||
:return: [x1, y1, x2, y2]
|
||
"""
|
||
pts = np.array(quad).astype(int)
|
||
x_min = np.min(pts[:, 0])
|
||
y_min = np.min(pts[:, 1])
|
||
x_max = np.max(pts[:, 0])
|
||
y_max = np.max(pts[:, 1])
|
||
|
||
return [
|
||
int(max(0, x_min) * 1000 / width),
|
||
int(max(0, y_min) * 1000 / height),
|
||
int(min(width, x_max) * 1000 / width),
|
||
int(min(height, y_max) * 1000 / height)
|
||
]
|
||
|
||
# 便捷函数
|
||
def get_easyocr_reader(gpu=True):
|
||
return EasyOcrKit(gpu=gpu)
|