120 lines
4.5 KiB
Python
120 lines
4.5 KiB
Python
# coding=utf-8
|
||
import os
|
||
import sys
|
||
from paddleocr import PaddleOCR
|
||
import numpy as np
|
||
|
||
def test_ocr():
|
||
# 1. 初始化 PaddleOCR
|
||
print("正在初始化 PaddleOCR 模型...")
|
||
try:
|
||
ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
|
||
except Exception as e:
|
||
print(f"初始化失败: {e}")
|
||
return
|
||
|
||
# 2. 准备测试图片
|
||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||
# 优先查找 2.jpg,如果不存在则查找 1.jpg
|
||
image_path = os.path.join(current_dir, "2.jpg")
|
||
if not os.path.exists(image_path):
|
||
image_path = os.path.join(current_dir, "1.jpg")
|
||
|
||
if not os.path.exists(image_path):
|
||
print(f"错误: 未找到测试图片: {image_path}")
|
||
print("请将测试图片命名为 2.jpg 或 1.jpg 并放置在 Test 目录下。")
|
||
return
|
||
|
||
print(f"正在识别图片: {image_path}")
|
||
|
||
# 3. 执行识别
|
||
try:
|
||
# result 是一个列表,通常包含一个 OCRResult 对象 (新版) 或列表 (旧版)
|
||
result = ocr.ocr(image_path)
|
||
|
||
except Exception as e:
|
||
print(f"识别过程发生异常: {e}")
|
||
return
|
||
|
||
# 4. 输出结果
|
||
print("\n" + "="*20 + " 识别结果 " + "="*20)
|
||
sys.stdout.flush()
|
||
|
||
# 同时输出到文件,方便查看
|
||
output_file = os.path.join(current_dir, "ocr_output.txt")
|
||
with open(output_file, "w", encoding="utf-8") as f_out:
|
||
if not result:
|
||
msg = "未识别到任何文字 (Result is empty)。"
|
||
print(msg)
|
||
f_out.write(msg + "\n")
|
||
else:
|
||
# 取出第一个结果(通常是单张图片的结果)
|
||
res = result[0]
|
||
|
||
# 检查是否为 None
|
||
if res is None:
|
||
msg = "未识别到任何文字 (Result[0] is None)。"
|
||
print(msg)
|
||
f_out.write(msg + "\n")
|
||
|
||
# 情况 A: 新版 PaddleX OCRResult 对象 (表现为字典或对象)
|
||
elif hasattr(res, 'get') and 'rec_texts' in res:
|
||
texts = res.get('rec_texts', [])
|
||
scores = res.get('rec_scores', [])
|
||
|
||
if not texts:
|
||
msg = "未识别到任何文字 (rec_texts is empty)。"
|
||
print(msg)
|
||
f_out.write(msg + "\n")
|
||
else:
|
||
for i, text in enumerate(texts):
|
||
score = scores[i] if i < len(scores) else 0.0
|
||
msg = f"行 {i+1}: {text} (置信度: {score:.4f})"
|
||
print(msg)
|
||
f_out.write(msg + "\n")
|
||
|
||
# 情况 B: 对象属性访问
|
||
elif hasattr(res, 'rec_texts'):
|
||
texts = res.rec_texts
|
||
scores = res.rec_scores
|
||
|
||
if not texts:
|
||
msg = "未识别到任何文字 (rec_texts is empty)。"
|
||
print(msg)
|
||
f_out.write(msg + "\n")
|
||
else:
|
||
for i, text in enumerate(texts):
|
||
score = scores[i] if i < len(scores) else 0.0
|
||
msg = f"行 {i+1}: {text} (置信度: {score:.4f})"
|
||
print(msg)
|
||
f_out.write(msg + "\n")
|
||
|
||
# 情况 C: 旧版 list of lists 结构
|
||
elif isinstance(res, list):
|
||
for idx, line in enumerate(res):
|
||
try:
|
||
if len(line) >= 2 and isinstance(line[1], (tuple, list)):
|
||
text, score = line[1]
|
||
msg = f"行 {idx+1}: {text} (置信度: {score:.4f})"
|
||
print(msg)
|
||
f_out.write(msg + "\n")
|
||
else:
|
||
msg = f"行 {idx+1}: {line} (格式未知)"
|
||
print(msg)
|
||
f_out.write(msg + "\n")
|
||
except Exception as e:
|
||
print(f"行 {idx+1} 解析失败: {e}")
|
||
|
||
else:
|
||
msg = f"无法解析结果结构: {type(res)}"
|
||
print(msg)
|
||
print(f"Result content: {res}")
|
||
f_out.write(msg + "\n")
|
||
f_out.write(f"Result content: {res}\n")
|
||
|
||
print("="*50)
|
||
sys.stdout.flush()
|
||
|
||
if __name__ == "__main__":
|
||
test_ocr()
|