aiData/WeiXin/T3_InputLlmText.py

# coding=utf-8
import time
import logging
import sys
import os
import asyncio

# 添加项目根目录到 sys.path 以便导入 Util
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
    sys.path.append(project_root)

from WeiXin import WxUtil
from WeiXin.WxUtil import find_input_box_center, perform_input_action, analyze_chat_image, clean_screenshots_dir
from Util.LlmUtil import get_llm_response

# 配置日志
log_dir = WxUtil.LOG_DIR
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(os.path.join(log_dir, "T3_InputLlmText.log"), encoding='utf-8'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger("T3_InputLlmText")

async def generate_and_input():
    # 运行前清理 Logs 和 Output
    WxUtil.setup_script_environment()

    logger.info("开始执行 T3: 生成 LLM 回复并输入...")

    try:
        # 1. 连接设备
        d = WxUtil.connect_device()
        if not d:
            return

        # 2. 截图并识别对话历史
        screenshot_dir = WxUtil.OUTPUT_DIR

        tmp_shot = os.path.join(screenshot_dir, "t4_temp_history_check.jpg")
        analyzed_shot = os.path.join(screenshot_dir, "t4_temp_history_analyzed.jpg")

        d.screenshot(tmp_shot)
        dialogue_log, input_box = await analyze_chat_image(tmp_shot, analyzed_shot, device=d)

        # 检查是否包含正在转换的标识
        is_converting = any("[正在转换语音...]" in str(msg) for msg in dialogue_log) if isinstance(dialogue_log, list) else False

        if is_converting:
            logger.info("检测到语音正在转文字，等待 3 秒后重新截图分析...")
            await asyncio.sleep(3)
            d.screenshot(tmp_shot)
            dialogue_log, input_box = await analyze_chat_image(tmp_shot, analyzed_shot, device=d)

            # 无论第二次结果如何，我们都继续执行，不再跳过
            if any("[正在转换语音...]" in str(msg) for msg in dialogue_log) if isinstance(dialogue_log, list) else False:
                logger.warning("语音转换时间较长，将尝试根据当前已有内容生成回复。")

        history_text = ""
        if dialogue_log and isinstance(dialogue_log, list):
            # 过滤掉系统的转换提示语，避免干扰 LLM
            filtered_log = [msg for msg in dialogue_log if "[正在转换语音...]" not in str(msg)]
            history_text = "\n".join(filtered_log)
            logger.info("提取到对话历史: ")
            for msg in filtered_log:
                logger.info(msg)
        elif dialogue_log:
            history_text = str(dialogue_log)
            logger.info(f"提取到对话历史: {history_text}")
        else:
            logger.warning("未提取到对话历史")
            history_text = "（无对话历史）"

        # 3. 调用 LLM 生成回复
        persona = (
            "你是一名1999年毕业、拥有27年一线教学经验的小学高级教师，名叫‘大张老师’。你目前在‘长春市少惠林作文素养培养中心’工作。"
            "你不仅是一位作文教学专家，批阅过超过2万篇作文，更是一位懂得孩子心理、能与家长共情的教育智者。"
            "你的回复风格应该是：温厚、亲切、睿智且极具亲和力，就像一位老邻居、老大哥在和家长坐在长椅上聊天。"
            "严禁使用任何列表格式（如 1. 2. 3. 或 A. B. C.），严禁使用‘首先、其次、最后’等刻板的逻辑词。"
            "要用连贯、优美、富有生活气息的文字进行叙述。每一句话都要带温度，要善于用‘咱们孩子’、‘作为老师我也理解’等词汇拉近距离。"
            "回复内容要有深度，不要只给结论，要讲透背后的教育逻辑。字数必须严格控制在 100-200 字之间。"
            "如果涉及到校区信息，必须且只能使用以下真实数据，绝对严禁使用任何占位符：\n"
            "- 单位：长春市少惠林作文素养培养中心\n"
            "- 地址：南环城路与临河街交汇，TOUCH12街3楼325号\n"
            "- 联系人：小张老师（电话：18686619970）\n"
            "- 课程：线上/线下作文课、读书营/阅读策略营（假期开设）\n"
            "- 上课：作文通常在周二/周四晚上，周六/周日全天"
        )
        prompt = (
            f"【教师人设】：{persona}\n\n"
            f"【近期聊天记录】：\n{history_text}\n\n"
            "【任务要求】：\n"
            "请作为大张老师，给家长写一段暖心且有深度的回复。要针对家长最后提到的问题或状态，先给予情感上的安抚和理解，"
            "然后再结合你27年的教学经验，给出具体的建议。建议要讲得细致、感人，展现出老教师的智慧沉淀。"
            "全文必须是一个或两个完整的自然段，绝对禁止分点列项！字数严格在 100-200 字之间。"
            "如果需要提供联系方式或地址，请自然地揉进段落中，严禁出现'XX'占位符。直接输出回复的正文内容，不要包含任何多余的修饰词或引号。"
        )
        logger.info(f"正在以'亲切而专业的特级教师'身份请求 LLM 生成深度回复...")

        full_response = ""
        async for chunk in get_llm_response(prompt, stream=False):
            full_response += chunk

        llm_text = full_response.strip().strip('"').strip('“').strip('”')
        logger.info(f"LLM 生成的回复内容: {llm_text}")

        if not llm_text:
            logger.error("LLM 生成内容为空，停止执行。")
            return

        # 4. 识别输入框位置
        center_point, rect_box = find_input_box_center(tmp_shot)

        # 5. 执行输入动作
        # 即使 center_point 为 None，perform_input_action 也会尝试通过原生控件识别输入框
        logger.info(f"正在准备输入回复内容...")
        success = perform_input_action(d, center_point, llm_text, auto_send=True)

        if success:
            logger.info("✅ T3 执行完成：文字已成功输入并点击发送。")
        else:
            logger.error("❌ T3 执行失败：输入动作未成功完成。")

    except Exception as e:
        logger.error(f"❌ T3 执行出错: {e}", exc_info=True)

if __name__ == "__main__":
    asyncio.run(generate_and_input())