'commit'
@@ -21,16 +21,27 @@ class EasyOcrKit:
|
||||
:param langs: 识别语言列表
|
||||
:param gpu: 是否使用 GPU
|
||||
"""
|
||||
# 检查是否需要重新初始化(例如从 GPU 切换到 CPU)
|
||||
if self._reader is not None:
|
||||
if hasattr(self, '_using_gpu') and self._using_gpu != gpu:
|
||||
logger.info(f"检测到 GPU 模式变更 ({self._using_gpu} -> {gpu}),重新初始化 Reader...")
|
||||
self._reader = None
|
||||
|
||||
if self._reader is None:
|
||||
try:
|
||||
self._reader = easyocr.Reader(langs, gpu=gpu)
|
||||
self._using_gpu = gpu # 记录当前使用的模式
|
||||
logger.info(f"EasyOCR Reader 初始化成功 (gpu={gpu})")
|
||||
except Exception as e:
|
||||
logger.error(f"EasyOCR Reader 初始化失败: {e}")
|
||||
# 如果 GPU 失败,尝试回退到 CPU
|
||||
if gpu:
|
||||
logger.warning("尝试回退到 CPU 模式...")
|
||||
self._reader = easyocr.Reader(langs, gpu=False)
|
||||
try:
|
||||
self._reader = easyocr.Reader(langs, gpu=False)
|
||||
self._using_gpu = False
|
||||
except Exception as ex:
|
||||
logger.error(f"EasyOCR CPU 模式回退也失败: {ex}")
|
||||
|
||||
def read_text(self, image):
|
||||
"""
|
||||
|
||||
|
Before Width: | Height: | Size: 156 KiB After Width: | Height: | Size: 215 KiB |
|
Before Width: | Height: | Size: 77 KiB After Width: | Height: | Size: 137 KiB |
|
Before Width: | Height: | Size: 64 KiB After Width: | Height: | Size: 137 KiB |
|
Before Width: | Height: | Size: 87 KiB After Width: | Height: | Size: 123 KiB |
|
Before Width: | Height: | Size: 71 KiB After Width: | Height: | Size: 137 KiB |
@@ -7,13 +7,13 @@ import os
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
|
||||
from Util import Win32Patch
|
||||
|
||||
# 添加项目根目录到 sys.path
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
from Util import Win32Patch
|
||||
|
||||
from WeiXin.WxUtil import find_input_box_center, perform_input_action, get_vlm_analysis, clean_screenshots_dir, is_in_chat_interface, get_vlm_json, find_template_match, find_all_template_matches
|
||||
from Util.LlmUtil import get_llm_response
|
||||
from Util.EasyOcrKit import EasyOcrKit
|
||||
@@ -49,21 +49,19 @@ logger.propagate = False
|
||||
logger.info(f"日志文件路径: {log_file_path}")
|
||||
|
||||
# 配置参数
|
||||
CHECK_INTERVAL = 10 # 检查频率 (秒)
|
||||
SILENCE_THRESHOLD = 60 * 5 # 静默阈值 (5分钟) 主动询问
|
||||
MAX_PROACTIVE_PROMPTS = 1 # 最大主动询问次数
|
||||
CHECK_INTERVAL = 5 # 检查频率 (秒)
|
||||
|
||||
class ChatBot:
|
||||
def __init__(self):
|
||||
self.d = u2.connect()
|
||||
self.last_message_text = ""
|
||||
self.last_interaction_time = time.time()
|
||||
self.proactive_count = 0
|
||||
self.last_processed_msg = None # 记录上一条已处理/回复过的对方消息内容
|
||||
self.screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
|
||||
if not os.path.exists(self.screenshot_dir):
|
||||
os.makedirs(self.screenshot_dir)
|
||||
|
||||
self.ocr_kit = EasyOcrKit()
|
||||
# 强制使用 CPU 模式以避免 0xC0000409 (Stack Buffer Overrun) 崩溃
|
||||
self.ocr_kit = EasyOcrKit(gpu=False)
|
||||
|
||||
self.is_first_run = True # 首次运行标志
|
||||
|
||||
@@ -84,26 +82,16 @@ class ChatBot:
|
||||
"- 每学期开学招收小学三年级至六年级,初中七年级的学生入学,其它年段不招生。\n"
|
||||
)
|
||||
|
||||
async def get_reply(self, history_text, is_proactive=False):
|
||||
if is_proactive:
|
||||
prompt = (
|
||||
f"【教师人设】:{self.persona}\n\n"
|
||||
f"【对话背景】:家长已经超过5分钟没有回应了。\n"
|
||||
f"【近期聊天记录】:\n{history_text}\n\n"
|
||||
"【任务要求】:\n"
|
||||
"请作为大张老师,给家长发一段简短的关怀消息。不要催促,语气温柔。"
|
||||
"字数严格控制在 50 字以内。不要编造事实。"
|
||||
)
|
||||
else:
|
||||
prompt = (
|
||||
f"【教师人设】:{self.persona}\n\n"
|
||||
f"【近期聊天记录】:\n{history_text}\n\n"
|
||||
"【任务要求】:\n"
|
||||
"请作为大张老师回复家长。**必须且只能针对聊天记录中的最后一条消息进行回复!**\n"
|
||||
"之前的聊天记录仅供参考上下文,如果之前的问题已经回答过,绝对不要重复回答。\n"
|
||||
"严禁发散,严禁编造家长没说过的情况。如果不清楚家长的意图,就温柔询问。\n"
|
||||
"字数严格控制在 50 字以内。直接输出回复正文。"
|
||||
)
|
||||
async def get_reply(self, history_text):
|
||||
prompt = (
|
||||
f"【教师人设】:{self.persona}\n\n"
|
||||
f"【近期聊天记录】:\n{history_text}\n\n"
|
||||
"【任务要求】:\n"
|
||||
"请作为大张老师回复家长。**必须且只能针对聊天记录中的最后一条消息进行回复!**\n"
|
||||
"之前的聊天记录仅供参考上下文,如果之前的问题已经回答过,绝对不要重复回答。\n"
|
||||
"严禁发散,严禁编造家长没说过的情况。如果不清楚家长的意图,就温柔询问。\n"
|
||||
"字数严格控制在 50 字以内。直接输出回复正文。"
|
||||
)
|
||||
|
||||
full_response = ""
|
||||
async for chunk in get_llm_response(prompt, stream=False):
|
||||
@@ -120,42 +108,53 @@ class ChatBot:
|
||||
content = voice_msg.get('content', '0"')
|
||||
logger.info(f"🎤 开始处理语音消息: {content}, 坐标: ({vx}, {vy})")
|
||||
|
||||
# 1. 长按语音消息
|
||||
self.d.long_click(vx, vy, 0.6)
|
||||
time.sleep(0.3)
|
||||
|
||||
# 2. CV 模板匹配寻找 "转文字" 按钮
|
||||
menu_shot_path = os.path.join(self.screenshot_dir, "t6_menu_shot_convert.jpg")
|
||||
self.d.screenshot(menu_shot_path)
|
||||
|
||||
convert_template = r"d:\dsWork\aiData\WeiXin\Templates\zhun_wen_zi.jpg"
|
||||
convert_btn = find_template_match(menu_shot_path, convert_template, threshold=0.7)
|
||||
|
||||
if not convert_btn:
|
||||
logger.warning("❌ CV 未找到 '转文字' 按钮,取消操作。")
|
||||
self.d.click(vx + 200, vy) # 点击空白处关闭菜单
|
||||
return None
|
||||
|
||||
logger.info(f"✅ CV 找到 '转文字' 按钮: {convert_btn}")
|
||||
self.d.click(convert_btn[0], convert_btn[1])
|
||||
|
||||
# 3. 动态等待转换
|
||||
duration_str = content.replace('"', '').strip()
|
||||
try:
|
||||
duration = int(duration_str)
|
||||
except:
|
||||
duration = 10
|
||||
wait_seconds = max(2, duration / 5.0)
|
||||
logger.info(f"⏳ 语音时长 {duration}s,等待转换 {wait_seconds:.1f}s...")
|
||||
time.sleep(wait_seconds)
|
||||
|
||||
# 4. 截图并 OCR 识别内容
|
||||
ocr_shot_path = os.path.join(self.screenshot_dir, "t6_ocr_shot.jpg")
|
||||
self.d.screenshot(ocr_shot_path)
|
||||
|
||||
# OCR 识别
|
||||
# 策略:识别整个屏幕,但只提取位于当前语音消息下方,且在下一条消息(如果有)上方的内容
|
||||
ocr_results = self.ocr_kit.read_text(ocr_shot_path)
|
||||
# 1. 长按语音消息
|
||||
logger.info("👆 正在长按语音消息...")
|
||||
self.d.long_click(vx, vy, 0.6)
|
||||
logger.info("✅ 长按完成,等待菜单...")
|
||||
time.sleep(0.3)
|
||||
|
||||
# 2. CV 模板匹配寻找 "转文字" 按钮
|
||||
menu_shot_path = os.path.join(self.screenshot_dir, "t6_menu_shot_convert.jpg")
|
||||
logger.info(f"📸 截取菜单图: {menu_shot_path}")
|
||||
self.d.screenshot(menu_shot_path)
|
||||
|
||||
convert_template = r"d:\dsWork\aiData\WeiXin\Templates\zhun_wen_zi.jpg"
|
||||
logger.info(f"🔍 寻找模板: {convert_template}")
|
||||
convert_btn = find_template_match(menu_shot_path, convert_template, threshold=0.7)
|
||||
|
||||
if not convert_btn:
|
||||
logger.warning("❌ CV 未找到 '转文字' 按钮,取消操作。")
|
||||
self.d.click(vx + 200, vy) # 点击空白处关闭菜单
|
||||
return None
|
||||
|
||||
logger.info(f"✅ CV 找到 '转文字' 按钮: {convert_btn}")
|
||||
self.d.click(convert_btn[0], convert_btn[1])
|
||||
|
||||
# 3. 动态等待转换
|
||||
duration_str = content.replace('"', '').strip()
|
||||
try:
|
||||
duration = int(duration_str)
|
||||
except:
|
||||
duration = 10
|
||||
wait_seconds = max(2, duration / 5.0)
|
||||
logger.info(f"⏳ 语音时长 {duration}s,等待转换 {wait_seconds:.1f}s...")
|
||||
time.sleep(wait_seconds)
|
||||
|
||||
# 4. 截图并 OCR 识别内容
|
||||
ocr_shot_path = os.path.join(self.screenshot_dir, "t6_ocr_shot.jpg")
|
||||
logger.info(f"📸 截取 OCR 识别图: {ocr_shot_path}")
|
||||
self.d.screenshot(ocr_shot_path)
|
||||
|
||||
# OCR 识别
|
||||
# 策略:识别整个屏幕,但只提取位于当前语音消息下方,且在下一条消息(如果有)上方的内容
|
||||
logger.info("📖 开始 OCR 识别...")
|
||||
ocr_results = self.ocr_kit.read_text(ocr_shot_path)
|
||||
logger.info(f"✅ OCR 识别完成,获取 {len(ocr_results)} 个文本块")
|
||||
except Exception as e:
|
||||
logger.error(f"❌ process_single_voice 发生异常: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
# 按 Y 坐标排序,确保从上往下处理
|
||||
ocr_results.sort(key=lambda x: (x[0][0][1] + x[0][2][1]) / 2)
|
||||
@@ -417,6 +416,14 @@ class ChatBot:
|
||||
processed_voice_content = text
|
||||
|
||||
self.is_first_run = False # 标记首次运行结束
|
||||
|
||||
# 初始化 last_processed_msg,避免回复历史消息
|
||||
if dialogue_log:
|
||||
last_log = dialogue_log[-1]
|
||||
if last_log.startswith("对方"):
|
||||
content = last_log.split(":", 1)[1].strip()
|
||||
self.last_processed_msg = content
|
||||
logger.info(f"🌟 首次运行,标记最后一条对方消息为已处理: {content}")
|
||||
|
||||
else:
|
||||
# 后续监控:只处理最后一条,且必须是未读 (is_unread=True)
|
||||
@@ -452,30 +459,45 @@ class ChatBot:
|
||||
history_text = "\n".join(dialogue_log)
|
||||
|
||||
# 判断是否需要回复:
|
||||
# 核心规则:只有当最后一条消息是“对方”说的,才回复。如果是“我”说的,则不回复。
|
||||
# 核心规则:只有当最后一条消息是“对方”说的,且内容未处理过,才回复。
|
||||
|
||||
should_reply = False
|
||||
current_last_content = ""
|
||||
|
||||
if dialogue_log:
|
||||
last_log = dialogue_log[-1]
|
||||
|
||||
# 检查最后一条消息的发送者
|
||||
if last_log.startswith("对方"):
|
||||
logger.info(f"💡 最后一条消息是对方发送,准备回复。内容: {last_log}")
|
||||
should_reply = True
|
||||
parts = last_log.split(":", 1)
|
||||
if len(parts) > 1:
|
||||
current_last_content = parts[1].strip()
|
||||
else:
|
||||
current_last_content = last_log
|
||||
|
||||
if current_last_content != self.last_processed_msg:
|
||||
logger.info(f"💡 发现新消息,准备回复。内容: {current_last_content}")
|
||||
should_reply = True
|
||||
else:
|
||||
# logger.info(f"⚪ 消息已回复过,跳过: {current_last_content}")
|
||||
should_reply = False
|
||||
else:
|
||||
logger.info(f"⚪ 最后一条消息是我发送的,无需回复。内容: {last_log}")
|
||||
# logger.info(f"⚪ 最后一条消息是我发送的,无需回复。")
|
||||
should_reply = False
|
||||
# 如果最后一条是我发的,重置 last_processed_msg
|
||||
self.last_processed_msg = None
|
||||
|
||||
if should_reply:
|
||||
logger.info("🤖 准备调用 LLM 生成回复...")
|
||||
# 立即更新状态,防止在回复生成期间(如果耗时)重复触发
|
||||
self.last_processed_msg = current_last_content
|
||||
|
||||
reply = await self.get_reply(history_text)
|
||||
logger.info(f"💡 LLM 回复: {reply}")
|
||||
|
||||
if reply and input_center:
|
||||
# 输入并发送
|
||||
perform_input_action(self.d, input_center, reply)
|
||||
# 记录回复时间
|
||||
self.last_interaction_time = time.time()
|
||||
|
||||
# 休眠
|
||||
await asyncio.sleep(CHECK_INTERVAL)
|
||||
|
||||