Files
aiData/Test/TestOCR.py
HuangHai b66f683dfb 'commit'
2026-01-12 07:49:18 +08:00

120 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
import os
import sys
from paddleocr import PaddleOCR
import numpy as np
def test_ocr():
# 1. 初始化 PaddleOCR
print("正在初始化 PaddleOCR 模型...")
try:
ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
except Exception as e:
print(f"初始化失败: {e}")
return
# 2. 准备测试图片
current_dir = os.path.dirname(os.path.abspath(__file__))
# 优先查找 2.jpg如果不存在则查找 1.jpg
image_path = os.path.join(current_dir, "2.jpg")
if not os.path.exists(image_path):
image_path = os.path.join(current_dir, "1.jpg")
if not os.path.exists(image_path):
print(f"错误: 未找到测试图片: {image_path}")
print("请将测试图片命名为 2.jpg 或 1.jpg 并放置在 Test 目录下。")
return
print(f"正在识别图片: {image_path}")
# 3. 执行识别
try:
# result 是一个列表,通常包含一个 OCRResult 对象 (新版) 或列表 (旧版)
result = ocr.ocr(image_path)
except Exception as e:
print(f"识别过程发生异常: {e}")
return
# 4. 输出结果
print("\n" + "="*20 + " 识别结果 " + "="*20)
sys.stdout.flush()
# 同时输出到文件,方便查看
output_file = os.path.join(current_dir, "ocr_output.txt")
with open(output_file, "w", encoding="utf-8") as f_out:
if not result:
msg = "未识别到任何文字 (Result is empty)。"
print(msg)
f_out.write(msg + "\n")
else:
# 取出第一个结果(通常是单张图片的结果)
res = result[0]
# 检查是否为 None
if res is None:
msg = "未识别到任何文字 (Result[0] is None)。"
print(msg)
f_out.write(msg + "\n")
# 情况 A: 新版 PaddleX OCRResult 对象 (表现为字典或对象)
elif hasattr(res, 'get') and 'rec_texts' in res:
texts = res.get('rec_texts', [])
scores = res.get('rec_scores', [])
if not texts:
msg = "未识别到任何文字 (rec_texts is empty)。"
print(msg)
f_out.write(msg + "\n")
else:
for i, text in enumerate(texts):
score = scores[i] if i < len(scores) else 0.0
msg = f"{i+1}: {text} (置信度: {score:.4f})"
print(msg)
f_out.write(msg + "\n")
# 情况 B: 对象属性访问
elif hasattr(res, 'rec_texts'):
texts = res.rec_texts
scores = res.rec_scores
if not texts:
msg = "未识别到任何文字 (rec_texts is empty)。"
print(msg)
f_out.write(msg + "\n")
else:
for i, text in enumerate(texts):
score = scores[i] if i < len(scores) else 0.0
msg = f"{i+1}: {text} (置信度: {score:.4f})"
print(msg)
f_out.write(msg + "\n")
# 情况 C: 旧版 list of lists 结构
elif isinstance(res, list):
for idx, line in enumerate(res):
try:
if len(line) >= 2 and isinstance(line[1], (tuple, list)):
text, score = line[1]
msg = f"{idx+1}: {text} (置信度: {score:.4f})"
print(msg)
f_out.write(msg + "\n")
else:
msg = f"{idx+1}: {line} (格式未知)"
print(msg)
f_out.write(msg + "\n")
except Exception as e:
print(f"{idx+1} 解析失败: {e}")
else:
msg = f"无法解析结果结构: {type(res)}"
print(msg)
print(f"Result content: {res}")
f_out.write(msg + "\n")
f_out.write(f"Result content: {res}\n")
print("="*50)
sys.stdout.flush()
if __name__ == "__main__":
test_ocr()