120 lines
4.5 KiB
Python
120 lines
4.5 KiB
Python
|
|
# coding=utf-8
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
from paddleocr import PaddleOCR
|
|||
|
|
import numpy as np
|
|||
|
|
|
|||
|
|
def test_ocr():
|
|||
|
|
# 1. 初始化 PaddleOCR
|
|||
|
|
print("正在初始化 PaddleOCR 模型...")
|
|||
|
|
try:
|
|||
|
|
ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"初始化失败: {e}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 2. 准备测试图片
|
|||
|
|
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|||
|
|
# 优先查找 2.jpg,如果不存在则查找 1.jpg
|
|||
|
|
image_path = os.path.join(current_dir, "2.jpg")
|
|||
|
|
if not os.path.exists(image_path):
|
|||
|
|
image_path = os.path.join(current_dir, "1.jpg")
|
|||
|
|
|
|||
|
|
if not os.path.exists(image_path):
|
|||
|
|
print(f"错误: 未找到测试图片: {image_path}")
|
|||
|
|
print("请将测试图片命名为 2.jpg 或 1.jpg 并放置在 Test 目录下。")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
print(f"正在识别图片: {image_path}")
|
|||
|
|
|
|||
|
|
# 3. 执行识别
|
|||
|
|
try:
|
|||
|
|
# result 是一个列表,通常包含一个 OCRResult 对象 (新版) 或列表 (旧版)
|
|||
|
|
result = ocr.ocr(image_path)
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"识别过程发生异常: {e}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# 4. 输出结果
|
|||
|
|
print("\n" + "="*20 + " 识别结果 " + "="*20)
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
|
|||
|
|
# 同时输出到文件,方便查看
|
|||
|
|
output_file = os.path.join(current_dir, "ocr_output.txt")
|
|||
|
|
with open(output_file, "w", encoding="utf-8") as f_out:
|
|||
|
|
if not result:
|
|||
|
|
msg = "未识别到任何文字 (Result is empty)。"
|
|||
|
|
print(msg)
|
|||
|
|
f_out.write(msg + "\n")
|
|||
|
|
else:
|
|||
|
|
# 取出第一个结果(通常是单张图片的结果)
|
|||
|
|
res = result[0]
|
|||
|
|
|
|||
|
|
# 检查是否为 None
|
|||
|
|
if res is None:
|
|||
|
|
msg = "未识别到任何文字 (Result[0] is None)。"
|
|||
|
|
print(msg)
|
|||
|
|
f_out.write(msg + "\n")
|
|||
|
|
|
|||
|
|
# 情况 A: 新版 PaddleX OCRResult 对象 (表现为字典或对象)
|
|||
|
|
elif hasattr(res, 'get') and 'rec_texts' in res:
|
|||
|
|
texts = res.get('rec_texts', [])
|
|||
|
|
scores = res.get('rec_scores', [])
|
|||
|
|
|
|||
|
|
if not texts:
|
|||
|
|
msg = "未识别到任何文字 (rec_texts is empty)。"
|
|||
|
|
print(msg)
|
|||
|
|
f_out.write(msg + "\n")
|
|||
|
|
else:
|
|||
|
|
for i, text in enumerate(texts):
|
|||
|
|
score = scores[i] if i < len(scores) else 0.0
|
|||
|
|
msg = f"行 {i+1}: {text} (置信度: {score:.4f})"
|
|||
|
|
print(msg)
|
|||
|
|
f_out.write(msg + "\n")
|
|||
|
|
|
|||
|
|
# 情况 B: 对象属性访问
|
|||
|
|
elif hasattr(res, 'rec_texts'):
|
|||
|
|
texts = res.rec_texts
|
|||
|
|
scores = res.rec_scores
|
|||
|
|
|
|||
|
|
if not texts:
|
|||
|
|
msg = "未识别到任何文字 (rec_texts is empty)。"
|
|||
|
|
print(msg)
|
|||
|
|
f_out.write(msg + "\n")
|
|||
|
|
else:
|
|||
|
|
for i, text in enumerate(texts):
|
|||
|
|
score = scores[i] if i < len(scores) else 0.0
|
|||
|
|
msg = f"行 {i+1}: {text} (置信度: {score:.4f})"
|
|||
|
|
print(msg)
|
|||
|
|
f_out.write(msg + "\n")
|
|||
|
|
|
|||
|
|
# 情况 C: 旧版 list of lists 结构
|
|||
|
|
elif isinstance(res, list):
|
|||
|
|
for idx, line in enumerate(res):
|
|||
|
|
try:
|
|||
|
|
if len(line) >= 2 and isinstance(line[1], (tuple, list)):
|
|||
|
|
text, score = line[1]
|
|||
|
|
msg = f"行 {idx+1}: {text} (置信度: {score:.4f})"
|
|||
|
|
print(msg)
|
|||
|
|
f_out.write(msg + "\n")
|
|||
|
|
else:
|
|||
|
|
msg = f"行 {idx+1}: {line} (格式未知)"
|
|||
|
|
print(msg)
|
|||
|
|
f_out.write(msg + "\n")
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"行 {idx+1} 解析失败: {e}")
|
|||
|
|
|
|||
|
|
else:
|
|||
|
|
msg = f"无法解析结果结构: {type(res)}"
|
|||
|
|
print(msg)
|
|||
|
|
print(f"Result content: {res}")
|
|||
|
|
f_out.write(msg + "\n")
|
|||
|
|
f_out.write(f"Result content: {res}\n")
|
|||
|
|
|
|||
|
|
print("="*50)
|
|||
|
|
sys.stdout.flush()
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
test_ocr()
|