aiData/Test/TestOCR.py

# coding=utf-8
import os
import sys
from paddleocr import PaddleOCR
import numpy as np

def test_ocr():
    # 1. 初始化 PaddleOCR
    print("正在初始化 PaddleOCR 模型...")
    try:
        ocr = PaddleOCR(use_textline_orientation=True, lang="ch")
    except Exception as e:
        print(f"初始化失败: {e}")
        return

    # 2. 准备测试图片
    current_dir = os.path.dirname(os.path.abspath(__file__))
    # 优先查找 2.jpg，如果不存在则查找 1.jpg
    image_path = os.path.join(current_dir, "2.jpg")
    if not os.path.exists(image_path):
        image_path = os.path.join(current_dir, "1.jpg")

    if not os.path.exists(image_path):
        print(f"错误: 未找到测试图片: {image_path}")
        print("请将测试图片命名为 2.jpg 或 1.jpg 并放置在 Test 目录下。")
        return

    print(f"正在识别图片: {image_path}")

    # 3. 执行识别
    try:
        # result 是一个列表，通常包含一个 OCRResult 对象 (新版) 或列表 (旧版)
        result = ocr.ocr(image_path)

    except Exception as e:
        print(f"识别过程发生异常: {e}")
        return

    # 4. 输出结果
    print("\n" + "="*20 + " 识别结果 " + "="*20)
    sys.stdout.flush()

    # 同时输出到文件，方便查看
    output_file = os.path.join(current_dir, "ocr_output.txt")
    with open(output_file, "w", encoding="utf-8") as f_out:
        if not result:
            msg = "未识别到任何文字 (Result is empty)。"
            print(msg)
            f_out.write(msg + "\n")
        else:
            # 取出第一个结果（通常是单张图片的结果）
            res = result[0]

            # 检查是否为 None
            if res is None:
                 msg = "未识别到任何文字 (Result[0] is None)。"
                 print(msg)
                 f_out.write(msg + "\n")

            # 情况 A: 新版 PaddleX OCRResult 对象 (表现为字典或对象)
            elif hasattr(res, 'get') and 'rec_texts' in res:
                 texts = res.get('rec_texts', [])
                 scores = res.get('rec_scores', [])

                 if not texts:
                     msg = "未识别到任何文字 (rec_texts is empty)。"
                     print(msg)
                     f_out.write(msg + "\n")
                 else:
                     for i, text in enumerate(texts):
                         score = scores[i] if i < len(scores) else 0.0
                         msg = f"行 {i+1}: {text} (置信度: {score:.4f})"
                         print(msg)
                         f_out.write(msg + "\n")

            # 情况 B: 对象属性访问
            elif hasattr(res, 'rec_texts'):
                 texts = res.rec_texts
                 scores = res.rec_scores

                 if not texts:
                     msg = "未识别到任何文字 (rec_texts is empty)。"
                     print(msg)
                     f_out.write(msg + "\n")
                 else:
                     for i, text in enumerate(texts):
                         score = scores[i] if i < len(scores) else 0.0
                         msg = f"行 {i+1}: {text} (置信度: {score:.4f})"
                         print(msg)
                         f_out.write(msg + "\n")

            # 情况 C: 旧版 list of lists 结构
            elif isinstance(res, list):
                 for idx, line in enumerate(res):
                     try:
                         if len(line) >= 2 and isinstance(line[1], (tuple, list)):
                             text, score = line[1]
                             msg = f"行 {idx+1}: {text} (置信度: {score:.4f})"
                             print(msg)
                             f_out.write(msg + "\n")
                         else:
                             msg = f"行 {idx+1}: {line} (格式未知)"
                             print(msg)
                             f_out.write(msg + "\n")
                     except Exception as e:
                         print(f"行 {idx+1} 解析失败: {e}")

            else:
                 msg = f"无法解析结果结构: {type(res)}"
                 print(msg)
                 print(f"Result content: {res}")
                 f_out.write(msg + "\n")
                 f_out.write(f"Result content: {res}\n")

    print("="*50)
    sys.stdout.flush()

if __name__ == "__main__":
    test_ocr()