Files
aiData/Test/TestOCR.py

120 lines
4.5 KiB
Python
Raw Normal View History

2026-01-12 07:49:18 +08:00
# coding=utf-8
import os
import sys
from paddleocr import PaddleOCR
import numpy as np
def test_ocr():
# 1. 初始化 PaddleOCR
print("正在初始化 PaddleOCR 模型...")
try:
ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
except Exception as e:
print(f"初始化失败: {e}")
return
# 2. 准备测试图片
current_dir = os.path.dirname(os.path.abspath(__file__))
# 优先查找 2.jpg如果不存在则查找 1.jpg
image_path = os.path.join(current_dir, "2.jpg")
if not os.path.exists(image_path):
image_path = os.path.join(current_dir, "1.jpg")
if not os.path.exists(image_path):
print(f"错误: 未找到测试图片: {image_path}")
print("请将测试图片命名为 2.jpg 或 1.jpg 并放置在 Test 目录下。")
return
print(f"正在识别图片: {image_path}")
# 3. 执行识别
try:
# result 是一个列表,通常包含一个 OCRResult 对象 (新版) 或列表 (旧版)
result = ocr.ocr(image_path)
except Exception as e:
print(f"识别过程发生异常: {e}")
return
# 4. 输出结果
print("\n" + "="*20 + " 识别结果 " + "="*20)
sys.stdout.flush()
# 同时输出到文件,方便查看
output_file = os.path.join(current_dir, "ocr_output.txt")
with open(output_file, "w", encoding="utf-8") as f_out:
if not result:
msg = "未识别到任何文字 (Result is empty)。"
print(msg)
f_out.write(msg + "\n")
else:
# 取出第一个结果(通常是单张图片的结果)
res = result[0]
# 检查是否为 None
if res is None:
msg = "未识别到任何文字 (Result[0] is None)。"
print(msg)
f_out.write(msg + "\n")
# 情况 A: 新版 PaddleX OCRResult 对象 (表现为字典或对象)
elif hasattr(res, 'get') and 'rec_texts' in res:
texts = res.get('rec_texts', [])
scores = res.get('rec_scores', [])
if not texts:
msg = "未识别到任何文字 (rec_texts is empty)。"
print(msg)
f_out.write(msg + "\n")
else:
for i, text in enumerate(texts):
score = scores[i] if i < len(scores) else 0.0
msg = f"{i+1}: {text} (置信度: {score:.4f})"
print(msg)
f_out.write(msg + "\n")
# 情况 B: 对象属性访问
elif hasattr(res, 'rec_texts'):
texts = res.rec_texts
scores = res.rec_scores
if not texts:
msg = "未识别到任何文字 (rec_texts is empty)。"
print(msg)
f_out.write(msg + "\n")
else:
for i, text in enumerate(texts):
score = scores[i] if i < len(scores) else 0.0
msg = f"{i+1}: {text} (置信度: {score:.4f})"
print(msg)
f_out.write(msg + "\n")
# 情况 C: 旧版 list of lists 结构
elif isinstance(res, list):
for idx, line in enumerate(res):
try:
if len(line) >= 2 and isinstance(line[1], (tuple, list)):
text, score = line[1]
msg = f"{idx+1}: {text} (置信度: {score:.4f})"
print(msg)
f_out.write(msg + "\n")
else:
msg = f"{idx+1}: {line} (格式未知)"
print(msg)
f_out.write(msg + "\n")
except Exception as e:
print(f"{idx+1} 解析失败: {e}")
else:
msg = f"无法解析结果结构: {type(res)}"
print(msg)
print(f"Result content: {res}")
f_out.write(msg + "\n")
f_out.write(f"Result content: {res}\n")
print("="*50)
sys.stdout.flush()
if __name__ == "__main__":
test_ocr()