# coding=utf-8 import os import sys from paddleocr import PaddleOCR import numpy as np def test_ocr(): # 1. 初始化 PaddleOCR print("正在初始化 PaddleOCR 模型...") try: ocr = PaddleOCR(use_textline_orientation=True, lang="ch") except Exception as e: print(f"初始化失败: {e}") return # 2. 准备测试图片 current_dir = os.path.dirname(os.path.abspath(__file__)) # 优先查找 2.jpg,如果不存在则查找 1.jpg image_path = os.path.join(current_dir, "2.jpg") if not os.path.exists(image_path): image_path = os.path.join(current_dir, "1.jpg") if not os.path.exists(image_path): print(f"错误: 未找到测试图片: {image_path}") print("请将测试图片命名为 2.jpg 或 1.jpg 并放置在 Test 目录下。") return print(f"正在识别图片: {image_path}") # 3. 执行识别 try: # result 是一个列表,通常包含一个 OCRResult 对象 (新版) 或列表 (旧版) result = ocr.ocr(image_path) except Exception as e: print(f"识别过程发生异常: {e}") return # 4. 输出结果 print("\n" + "="*20 + " 识别结果 " + "="*20) sys.stdout.flush() # 同时输出到文件,方便查看 output_file = os.path.join(current_dir, "ocr_output.txt") with open(output_file, "w", encoding="utf-8") as f_out: if not result: msg = "未识别到任何文字 (Result is empty)。" print(msg) f_out.write(msg + "\n") else: # 取出第一个结果(通常是单张图片的结果) res = result[0] # 检查是否为 None if res is None: msg = "未识别到任何文字 (Result[0] is None)。" print(msg) f_out.write(msg + "\n") # 情况 A: 新版 PaddleX OCRResult 对象 (表现为字典或对象) elif hasattr(res, 'get') and 'rec_texts' in res: texts = res.get('rec_texts', []) scores = res.get('rec_scores', []) if not texts: msg = "未识别到任何文字 (rec_texts is empty)。" print(msg) f_out.write(msg + "\n") else: for i, text in enumerate(texts): score = scores[i] if i < len(scores) else 0.0 msg = f"行 {i+1}: {text} (置信度: {score:.4f})" print(msg) f_out.write(msg + "\n") # 情况 B: 对象属性访问 elif hasattr(res, 'rec_texts'): texts = res.rec_texts scores = res.rec_scores if not texts: msg = "未识别到任何文字 (rec_texts is empty)。" print(msg) f_out.write(msg + "\n") else: for i, text in enumerate(texts): score = scores[i] if i < len(scores) else 0.0 msg = f"行 {i+1}: {text} (置信度: {score:.4f})" print(msg) f_out.write(msg + "\n") # 情况 C: 旧版 list of lists 结构 elif isinstance(res, list): for idx, line in enumerate(res): try: if len(line) >= 2 and isinstance(line[1], (tuple, list)): text, score = line[1] msg = f"行 {idx+1}: {text} (置信度: {score:.4f})" print(msg) f_out.write(msg + "\n") else: msg = f"行 {idx+1}: {line} (格式未知)" print(msg) f_out.write(msg + "\n") except Exception as e: print(f"行 {idx+1} 解析失败: {e}") else: msg = f"无法解析结果结构: {type(res)}" print(msg) print(f"Result content: {res}") f_out.write(msg + "\n") f_out.write(f"Result content: {res}\n") print("="*50) sys.stdout.flush() if __name__ == "__main__": test_ocr()