This commit is contained in:
HuangHai
2026-01-25 15:06:17 +08:00
parent 35654ec166
commit 8292bf83d1
8 changed files with 103 additions and 70 deletions

View File

@@ -21,16 +21,27 @@ class EasyOcrKit:
:param langs: 识别语言列表
:param gpu: 是否使用 GPU
"""
# 检查是否需要重新初始化(例如从 GPU 切换到 CPU
if self._reader is not None:
if hasattr(self, '_using_gpu') and self._using_gpu != gpu:
logger.info(f"检测到 GPU 模式变更 ({self._using_gpu} -> {gpu}),重新初始化 Reader...")
self._reader = None
if self._reader is None:
try:
self._reader = easyocr.Reader(langs, gpu=gpu)
self._using_gpu = gpu # 记录当前使用的模式
logger.info(f"EasyOCR Reader 初始化成功 (gpu={gpu})")
except Exception as e:
logger.error(f"EasyOCR Reader 初始化失败: {e}")
# 如果 GPU 失败,尝试回退到 CPU
if gpu:
logger.warning("尝试回退到 CPU 模式...")
self._reader = easyocr.Reader(langs, gpu=False)
try:
self._reader = easyocr.Reader(langs, gpu=False)
self._using_gpu = False
except Exception as ex:
logger.error(f"EasyOCR CPU 模式回退也失败: {ex}")
def read_text(self, image):
"""

Binary file not shown.

Before

Width:  |  Height:  |  Size: 156 KiB

After

Width:  |  Height:  |  Size: 215 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 77 KiB

After

Width:  |  Height:  |  Size: 137 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

After

Width:  |  Height:  |  Size: 137 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 87 KiB

After

Width:  |  Height:  |  Size: 123 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 71 KiB

After

Width:  |  Height:  |  Size: 137 KiB

View File

@@ -7,13 +7,13 @@ import os
import asyncio
from datetime import datetime
from Util import Win32Patch
# 添加项目根目录到 sys.path
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if project_root not in sys.path:
sys.path.append(project_root)
from Util import Win32Patch
from WeiXin.WxUtil import find_input_box_center, perform_input_action, get_vlm_analysis, clean_screenshots_dir, is_in_chat_interface, get_vlm_json, find_template_match, find_all_template_matches
from Util.LlmUtil import get_llm_response
from Util.EasyOcrKit import EasyOcrKit
@@ -49,21 +49,19 @@ logger.propagate = False
logger.info(f"日志文件路径: {log_file_path}")
# 配置参数
CHECK_INTERVAL = 10 # 检查频率 (秒)
SILENCE_THRESHOLD = 60 * 5 # 静默阈值 (5分钟) 主动询问
MAX_PROACTIVE_PROMPTS = 1 # 最大主动询问次数
CHECK_INTERVAL = 5 # 检查频率 (秒)
class ChatBot:
def __init__(self):
self.d = u2.connect()
self.last_message_text = ""
self.last_interaction_time = time.time()
self.proactive_count = 0
self.last_processed_msg = None # 记录上一条已处理/回复过的对方消息内容
self.screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
if not os.path.exists(self.screenshot_dir):
os.makedirs(self.screenshot_dir)
self.ocr_kit = EasyOcrKit()
# 强制使用 CPU 模式以避免 0xC0000409 (Stack Buffer Overrun) 崩溃
self.ocr_kit = EasyOcrKit(gpu=False)
self.is_first_run = True # 首次运行标志
@@ -84,26 +82,16 @@ class ChatBot:
"- 每学期开学招收小学三年级至六年级,初中七年级的学生入学,其它年段不招生。\n"
)
async def get_reply(self, history_text, is_proactive=False):
if is_proactive:
prompt = (
f"教师人设】:{self.persona}\n\n"
f"【对话背景】家长已经超过5分钟没有回应了。\n"
f"【近期聊天记录】:\n{history_text}\n\n"
"【任务要求】:\n"
"请作为大张老师,给家长发一段简短的关怀消息。不要催促,语气温柔。"
"字数严格控制在 50 字以内。不要编造事实"
)
else:
prompt = (
f"【教师人设】:{self.persona}\n\n"
f"【近期聊天记录】:\n{history_text}\n\n"
"【任务要求】:\n"
"请作为大张老师回复家长。**必须且只能针对聊天记录中的最后一条消息进行回复!**\n"
"之前的聊天记录仅供参考上下文,如果之前的问题已经回答过,绝对不要重复回答。\n"
"严禁发散,严禁编造家长没说过的情况。如果不清楚家长的意图,就温柔询问。\n"
"字数严格控制在 50 字以内。直接输出回复正文。"
)
async def get_reply(self, history_text):
prompt = (
f"【教师人设】:{self.persona}\n\n"
f"近期聊天记录】:\n{history_text}\n\n"
"【任务要求】:\n"
"请作为大张老师回复家长。**必须且只能针对聊天记录中的最后一条消息进行回复!**\n"
"之前的聊天记录仅供参考上下文,如果之前的问题已经回答过,绝对不要重复回答。\n"
"严禁发散,严禁编造家长没说过的情况。如果不清楚家长的意图,就温柔询问。\n"
"字数严格控制在 50 字以内。直接输出回复正文"
)
full_response = ""
async for chunk in get_llm_response(prompt, stream=False):
@@ -120,42 +108,53 @@ class ChatBot:
content = voice_msg.get('content', '0"')
logger.info(f"🎤 开始处理语音消息: {content}, 坐标: ({vx}, {vy})")
# 1. 长按语音消息
self.d.long_click(vx, vy, 0.6)
time.sleep(0.3)
# 2. CV 模板匹配寻找 "转文字" 按钮
menu_shot_path = os.path.join(self.screenshot_dir, "t6_menu_shot_convert.jpg")
self.d.screenshot(menu_shot_path)
convert_template = r"d:\dsWork\aiData\WeiXin\Templates\zhun_wen_zi.jpg"
convert_btn = find_template_match(menu_shot_path, convert_template, threshold=0.7)
if not convert_btn:
logger.warning("❌ CV 未找到 '转文字' 按钮,取消操作。")
self.d.click(vx + 200, vy) # 点击空白处关闭菜单
return None
logger.info(f"✅ CV 找到 '转文字' 按钮: {convert_btn}")
self.d.click(convert_btn[0], convert_btn[1])
# 3. 动态等待转换
duration_str = content.replace('"', '').strip()
try:
duration = int(duration_str)
except:
duration = 10
wait_seconds = max(2, duration / 5.0)
logger.info(f"⏳ 语音时长 {duration}s等待转换 {wait_seconds:.1f}s...")
time.sleep(wait_seconds)
# 4. 截图并 OCR 识别内容
ocr_shot_path = os.path.join(self.screenshot_dir, "t6_ocr_shot.jpg")
self.d.screenshot(ocr_shot_path)
# OCR 识别
# 策略:识别整个屏幕,但只提取位于当前语音消息下方,且在下一条消息(如果有)上方的内容
ocr_results = self.ocr_kit.read_text(ocr_shot_path)
# 1. 长按语音消息
logger.info("👆 正在长按语音消息...")
self.d.long_click(vx, vy, 0.6)
logger.info("✅ 长按完成,等待菜单...")
time.sleep(0.3)
# 2. CV 模板匹配寻找 "转文字" 按钮
menu_shot_path = os.path.join(self.screenshot_dir, "t6_menu_shot_convert.jpg")
logger.info(f"📸 截取菜单图: {menu_shot_path}")
self.d.screenshot(menu_shot_path)
convert_template = r"d:\dsWork\aiData\WeiXin\Templates\zhun_wen_zi.jpg"
logger.info(f"🔍 寻找模板: {convert_template}")
convert_btn = find_template_match(menu_shot_path, convert_template, threshold=0.7)
if not convert_btn:
logger.warning("❌ CV 未找到 '转文字' 按钮,取消操作。")
self.d.click(vx + 200, vy) # 点击空白处关闭菜单
return None
logger.info(f"✅ CV 找到 '转文字' 按钮: {convert_btn}")
self.d.click(convert_btn[0], convert_btn[1])
# 3. 动态等待转换
duration_str = content.replace('"', '').strip()
try:
duration = int(duration_str)
except:
duration = 10
wait_seconds = max(2, duration / 5.0)
logger.info(f"⏳ 语音时长 {duration}s等待转换 {wait_seconds:.1f}s...")
time.sleep(wait_seconds)
# 4. 截图并 OCR 识别内容
ocr_shot_path = os.path.join(self.screenshot_dir, "t6_ocr_shot.jpg")
logger.info(f"📸 截取 OCR 识别图: {ocr_shot_path}")
self.d.screenshot(ocr_shot_path)
# OCR 识别
# 策略:识别整个屏幕,但只提取位于当前语音消息下方,且在下一条消息(如果有)上方的内容
logger.info("📖 开始 OCR 识别...")
ocr_results = self.ocr_kit.read_text(ocr_shot_path)
logger.info(f"✅ OCR 识别完成,获取 {len(ocr_results)} 个文本块")
except Exception as e:
logger.error(f"❌ process_single_voice 发生异常: {e}", exc_info=True)
return None
# 按 Y 坐标排序,确保从上往下处理
ocr_results.sort(key=lambda x: (x[0][0][1] + x[0][2][1]) / 2)
@@ -417,6 +416,14 @@ class ChatBot:
processed_voice_content = text
self.is_first_run = False # 标记首次运行结束
# 初始化 last_processed_msg避免回复历史消息
if dialogue_log:
last_log = dialogue_log[-1]
if last_log.startswith("对方"):
content = last_log.split(":", 1)[1].strip()
self.last_processed_msg = content
logger.info(f"🌟 首次运行,标记最后一条对方消息为已处理: {content}")
else:
# 后续监控:只处理最后一条,且必须是未读 (is_unread=True)
@@ -452,30 +459,45 @@ class ChatBot:
history_text = "\n".join(dialogue_log)
# 判断是否需要回复:
# 核心规则:只有当最后一条消息是“对方”说的,才回复。如果是“我”说的,则不回复。
# 核心规则:只有当最后一条消息是“对方”说的,且内容未处理过,才回复。
should_reply = False
current_last_content = ""
if dialogue_log:
last_log = dialogue_log[-1]
# 检查最后一条消息的发送者
if last_log.startswith("对方"):
logger.info(f"💡 最后一条消息是对方发送,准备回复。内容: {last_log}")
should_reply = True
parts = last_log.split(":", 1)
if len(parts) > 1:
current_last_content = parts[1].strip()
else:
current_last_content = last_log
if current_last_content != self.last_processed_msg:
logger.info(f"💡 发现新消息,准备回复。内容: {current_last_content}")
should_reply = True
else:
# logger.info(f"⚪ 消息已回复过,跳过: {current_last_content}")
should_reply = False
else:
logger.info(f"⚪ 最后一条消息是我发送的,无需回复。内容: {last_log}")
# logger.info(f"⚪ 最后一条消息是我发送的,无需回复。")
should_reply = False
# 如果最后一条是我发的,重置 last_processed_msg
self.last_processed_msg = None
if should_reply:
logger.info("🤖 准备调用 LLM 生成回复...")
# 立即更新状态,防止在回复生成期间(如果耗时)重复触发
self.last_processed_msg = current_last_content
reply = await self.get_reply(history_text)
logger.info(f"💡 LLM 回复: {reply}")
if reply and input_center:
# 输入并发送
perform_input_action(self.d, input_center, reply)
# 记录回复时间
self.last_interaction_time = time.time()
# 休眠
await asyncio.sleep(CHECK_INTERVAL)