'commit'
@@ -21,16 +21,27 @@ class EasyOcrKit:
|
|||||||
:param langs: 识别语言列表
|
:param langs: 识别语言列表
|
||||||
:param gpu: 是否使用 GPU
|
:param gpu: 是否使用 GPU
|
||||||
"""
|
"""
|
||||||
|
# 检查是否需要重新初始化(例如从 GPU 切换到 CPU)
|
||||||
|
if self._reader is not None:
|
||||||
|
if hasattr(self, '_using_gpu') and self._using_gpu != gpu:
|
||||||
|
logger.info(f"检测到 GPU 模式变更 ({self._using_gpu} -> {gpu}),重新初始化 Reader...")
|
||||||
|
self._reader = None
|
||||||
|
|
||||||
if self._reader is None:
|
if self._reader is None:
|
||||||
try:
|
try:
|
||||||
self._reader = easyocr.Reader(langs, gpu=gpu)
|
self._reader = easyocr.Reader(langs, gpu=gpu)
|
||||||
|
self._using_gpu = gpu # 记录当前使用的模式
|
||||||
logger.info(f"EasyOCR Reader 初始化成功 (gpu={gpu})")
|
logger.info(f"EasyOCR Reader 初始化成功 (gpu={gpu})")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"EasyOCR Reader 初始化失败: {e}")
|
logger.error(f"EasyOCR Reader 初始化失败: {e}")
|
||||||
# 如果 GPU 失败,尝试回退到 CPU
|
# 如果 GPU 失败,尝试回退到 CPU
|
||||||
if gpu:
|
if gpu:
|
||||||
logger.warning("尝试回退到 CPU 模式...")
|
logger.warning("尝试回退到 CPU 模式...")
|
||||||
self._reader = easyocr.Reader(langs, gpu=False)
|
try:
|
||||||
|
self._reader = easyocr.Reader(langs, gpu=False)
|
||||||
|
self._using_gpu = False
|
||||||
|
except Exception as ex:
|
||||||
|
logger.error(f"EasyOCR CPU 模式回退也失败: {ex}")
|
||||||
|
|
||||||
def read_text(self, image):
|
def read_text(self, image):
|
||||||
"""
|
"""
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 156 KiB After Width: | Height: | Size: 215 KiB |
|
Before Width: | Height: | Size: 77 KiB After Width: | Height: | Size: 137 KiB |
|
Before Width: | Height: | Size: 64 KiB After Width: | Height: | Size: 137 KiB |
|
Before Width: | Height: | Size: 87 KiB After Width: | Height: | Size: 123 KiB |
|
Before Width: | Height: | Size: 71 KiB After Width: | Height: | Size: 137 KiB |
@@ -7,13 +7,13 @@ import os
|
|||||||
import asyncio
|
import asyncio
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from Util import Win32Patch
|
|
||||||
|
|
||||||
# 添加项目根目录到 sys.path
|
# 添加项目根目录到 sys.path
|
||||||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
if project_root not in sys.path:
|
if project_root not in sys.path:
|
||||||
sys.path.append(project_root)
|
sys.path.append(project_root)
|
||||||
|
|
||||||
|
from Util import Win32Patch
|
||||||
|
|
||||||
from WeiXin.WxUtil import find_input_box_center, perform_input_action, get_vlm_analysis, clean_screenshots_dir, is_in_chat_interface, get_vlm_json, find_template_match, find_all_template_matches
|
from WeiXin.WxUtil import find_input_box_center, perform_input_action, get_vlm_analysis, clean_screenshots_dir, is_in_chat_interface, get_vlm_json, find_template_match, find_all_template_matches
|
||||||
from Util.LlmUtil import get_llm_response
|
from Util.LlmUtil import get_llm_response
|
||||||
from Util.EasyOcrKit import EasyOcrKit
|
from Util.EasyOcrKit import EasyOcrKit
|
||||||
@@ -49,21 +49,19 @@ logger.propagate = False
|
|||||||
logger.info(f"日志文件路径: {log_file_path}")
|
logger.info(f"日志文件路径: {log_file_path}")
|
||||||
|
|
||||||
# 配置参数
|
# 配置参数
|
||||||
CHECK_INTERVAL = 10 # 检查频率 (秒)
|
CHECK_INTERVAL = 5 # 检查频率 (秒)
|
||||||
SILENCE_THRESHOLD = 60 * 5 # 静默阈值 (5分钟) 主动询问
|
|
||||||
MAX_PROACTIVE_PROMPTS = 1 # 最大主动询问次数
|
|
||||||
|
|
||||||
class ChatBot:
|
class ChatBot:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.d = u2.connect()
|
self.d = u2.connect()
|
||||||
self.last_message_text = ""
|
self.last_message_text = ""
|
||||||
self.last_interaction_time = time.time()
|
self.last_processed_msg = None # 记录上一条已处理/回复过的对方消息内容
|
||||||
self.proactive_count = 0
|
|
||||||
self.screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
|
self.screenshot_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "Screenshots")
|
||||||
if not os.path.exists(self.screenshot_dir):
|
if not os.path.exists(self.screenshot_dir):
|
||||||
os.makedirs(self.screenshot_dir)
|
os.makedirs(self.screenshot_dir)
|
||||||
|
|
||||||
self.ocr_kit = EasyOcrKit()
|
# 强制使用 CPU 模式以避免 0xC0000409 (Stack Buffer Overrun) 崩溃
|
||||||
|
self.ocr_kit = EasyOcrKit(gpu=False)
|
||||||
|
|
||||||
self.is_first_run = True # 首次运行标志
|
self.is_first_run = True # 首次运行标志
|
||||||
|
|
||||||
@@ -84,26 +82,16 @@ class ChatBot:
|
|||||||
"- 每学期开学招收小学三年级至六年级,初中七年级的学生入学,其它年段不招生。\n"
|
"- 每学期开学招收小学三年级至六年级,初中七年级的学生入学,其它年段不招生。\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get_reply(self, history_text, is_proactive=False):
|
async def get_reply(self, history_text):
|
||||||
if is_proactive:
|
prompt = (
|
||||||
prompt = (
|
f"【教师人设】:{self.persona}\n\n"
|
||||||
f"【教师人设】:{self.persona}\n\n"
|
f"【近期聊天记录】:\n{history_text}\n\n"
|
||||||
f"【对话背景】:家长已经超过5分钟没有回应了。\n"
|
"【任务要求】:\n"
|
||||||
f"【近期聊天记录】:\n{history_text}\n\n"
|
"请作为大张老师回复家长。**必须且只能针对聊天记录中的最后一条消息进行回复!**\n"
|
||||||
"【任务要求】:\n"
|
"之前的聊天记录仅供参考上下文,如果之前的问题已经回答过,绝对不要重复回答。\n"
|
||||||
"请作为大张老师,给家长发一段简短的关怀消息。不要催促,语气温柔。"
|
"严禁发散,严禁编造家长没说过的情况。如果不清楚家长的意图,就温柔询问。\n"
|
||||||
"字数严格控制在 50 字以内。不要编造事实。"
|
"字数严格控制在 50 字以内。直接输出回复正文。"
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
prompt = (
|
|
||||||
f"【教师人设】:{self.persona}\n\n"
|
|
||||||
f"【近期聊天记录】:\n{history_text}\n\n"
|
|
||||||
"【任务要求】:\n"
|
|
||||||
"请作为大张老师回复家长。**必须且只能针对聊天记录中的最后一条消息进行回复!**\n"
|
|
||||||
"之前的聊天记录仅供参考上下文,如果之前的问题已经回答过,绝对不要重复回答。\n"
|
|
||||||
"严禁发散,严禁编造家长没说过的情况。如果不清楚家长的意图,就温柔询问。\n"
|
|
||||||
"字数严格控制在 50 字以内。直接输出回复正文。"
|
|
||||||
)
|
|
||||||
|
|
||||||
full_response = ""
|
full_response = ""
|
||||||
async for chunk in get_llm_response(prompt, stream=False):
|
async for chunk in get_llm_response(prompt, stream=False):
|
||||||
@@ -120,42 +108,53 @@ class ChatBot:
|
|||||||
content = voice_msg.get('content', '0"')
|
content = voice_msg.get('content', '0"')
|
||||||
logger.info(f"🎤 开始处理语音消息: {content}, 坐标: ({vx}, {vy})")
|
logger.info(f"🎤 开始处理语音消息: {content}, 坐标: ({vx}, {vy})")
|
||||||
|
|
||||||
# 1. 长按语音消息
|
|
||||||
self.d.long_click(vx, vy, 0.6)
|
|
||||||
time.sleep(0.3)
|
|
||||||
|
|
||||||
# 2. CV 模板匹配寻找 "转文字" 按钮
|
|
||||||
menu_shot_path = os.path.join(self.screenshot_dir, "t6_menu_shot_convert.jpg")
|
|
||||||
self.d.screenshot(menu_shot_path)
|
|
||||||
|
|
||||||
convert_template = r"d:\dsWork\aiData\WeiXin\Templates\zhun_wen_zi.jpg"
|
|
||||||
convert_btn = find_template_match(menu_shot_path, convert_template, threshold=0.7)
|
|
||||||
|
|
||||||
if not convert_btn:
|
|
||||||
logger.warning("❌ CV 未找到 '转文字' 按钮,取消操作。")
|
|
||||||
self.d.click(vx + 200, vy) # 点击空白处关闭菜单
|
|
||||||
return None
|
|
||||||
|
|
||||||
logger.info(f"✅ CV 找到 '转文字' 按钮: {convert_btn}")
|
|
||||||
self.d.click(convert_btn[0], convert_btn[1])
|
|
||||||
|
|
||||||
# 3. 动态等待转换
|
|
||||||
duration_str = content.replace('"', '').strip()
|
|
||||||
try:
|
try:
|
||||||
duration = int(duration_str)
|
# 1. 长按语音消息
|
||||||
except:
|
logger.info("👆 正在长按语音消息...")
|
||||||
duration = 10
|
self.d.long_click(vx, vy, 0.6)
|
||||||
wait_seconds = max(2, duration / 5.0)
|
logger.info("✅ 长按完成,等待菜单...")
|
||||||
logger.info(f"⏳ 语音时长 {duration}s,等待转换 {wait_seconds:.1f}s...")
|
time.sleep(0.3)
|
||||||
time.sleep(wait_seconds)
|
|
||||||
|
# 2. CV 模板匹配寻找 "转文字" 按钮
|
||||||
# 4. 截图并 OCR 识别内容
|
menu_shot_path = os.path.join(self.screenshot_dir, "t6_menu_shot_convert.jpg")
|
||||||
ocr_shot_path = os.path.join(self.screenshot_dir, "t6_ocr_shot.jpg")
|
logger.info(f"📸 截取菜单图: {menu_shot_path}")
|
||||||
self.d.screenshot(ocr_shot_path)
|
self.d.screenshot(menu_shot_path)
|
||||||
|
|
||||||
# OCR 识别
|
convert_template = r"d:\dsWork\aiData\WeiXin\Templates\zhun_wen_zi.jpg"
|
||||||
# 策略:识别整个屏幕,但只提取位于当前语音消息下方,且在下一条消息(如果有)上方的内容
|
logger.info(f"🔍 寻找模板: {convert_template}")
|
||||||
ocr_results = self.ocr_kit.read_text(ocr_shot_path)
|
convert_btn = find_template_match(menu_shot_path, convert_template, threshold=0.7)
|
||||||
|
|
||||||
|
if not convert_btn:
|
||||||
|
logger.warning("❌ CV 未找到 '转文字' 按钮,取消操作。")
|
||||||
|
self.d.click(vx + 200, vy) # 点击空白处关闭菜单
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info(f"✅ CV 找到 '转文字' 按钮: {convert_btn}")
|
||||||
|
self.d.click(convert_btn[0], convert_btn[1])
|
||||||
|
|
||||||
|
# 3. 动态等待转换
|
||||||
|
duration_str = content.replace('"', '').strip()
|
||||||
|
try:
|
||||||
|
duration = int(duration_str)
|
||||||
|
except:
|
||||||
|
duration = 10
|
||||||
|
wait_seconds = max(2, duration / 5.0)
|
||||||
|
logger.info(f"⏳ 语音时长 {duration}s,等待转换 {wait_seconds:.1f}s...")
|
||||||
|
time.sleep(wait_seconds)
|
||||||
|
|
||||||
|
# 4. 截图并 OCR 识别内容
|
||||||
|
ocr_shot_path = os.path.join(self.screenshot_dir, "t6_ocr_shot.jpg")
|
||||||
|
logger.info(f"📸 截取 OCR 识别图: {ocr_shot_path}")
|
||||||
|
self.d.screenshot(ocr_shot_path)
|
||||||
|
|
||||||
|
# OCR 识别
|
||||||
|
# 策略:识别整个屏幕,但只提取位于当前语音消息下方,且在下一条消息(如果有)上方的内容
|
||||||
|
logger.info("📖 开始 OCR 识别...")
|
||||||
|
ocr_results = self.ocr_kit.read_text(ocr_shot_path)
|
||||||
|
logger.info(f"✅ OCR 识别完成,获取 {len(ocr_results)} 个文本块")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ process_single_voice 发生异常: {e}", exc_info=True)
|
||||||
|
return None
|
||||||
|
|
||||||
# 按 Y 坐标排序,确保从上往下处理
|
# 按 Y 坐标排序,确保从上往下处理
|
||||||
ocr_results.sort(key=lambda x: (x[0][0][1] + x[0][2][1]) / 2)
|
ocr_results.sort(key=lambda x: (x[0][0][1] + x[0][2][1]) / 2)
|
||||||
@@ -417,6 +416,14 @@ class ChatBot:
|
|||||||
processed_voice_content = text
|
processed_voice_content = text
|
||||||
|
|
||||||
self.is_first_run = False # 标记首次运行结束
|
self.is_first_run = False # 标记首次运行结束
|
||||||
|
|
||||||
|
# 初始化 last_processed_msg,避免回复历史消息
|
||||||
|
if dialogue_log:
|
||||||
|
last_log = dialogue_log[-1]
|
||||||
|
if last_log.startswith("对方"):
|
||||||
|
content = last_log.split(":", 1)[1].strip()
|
||||||
|
self.last_processed_msg = content
|
||||||
|
logger.info(f"🌟 首次运行,标记最后一条对方消息为已处理: {content}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# 后续监控:只处理最后一条,且必须是未读 (is_unread=True)
|
# 后续监控:只处理最后一条,且必须是未读 (is_unread=True)
|
||||||
@@ -452,30 +459,45 @@ class ChatBot:
|
|||||||
history_text = "\n".join(dialogue_log)
|
history_text = "\n".join(dialogue_log)
|
||||||
|
|
||||||
# 判断是否需要回复:
|
# 判断是否需要回复:
|
||||||
# 核心规则:只有当最后一条消息是“对方”说的,才回复。如果是“我”说的,则不回复。
|
# 核心规则:只有当最后一条消息是“对方”说的,且内容未处理过,才回复。
|
||||||
|
|
||||||
should_reply = False
|
should_reply = False
|
||||||
|
current_last_content = ""
|
||||||
|
|
||||||
if dialogue_log:
|
if dialogue_log:
|
||||||
last_log = dialogue_log[-1]
|
last_log = dialogue_log[-1]
|
||||||
|
|
||||||
# 检查最后一条消息的发送者
|
# 检查最后一条消息的发送者
|
||||||
if last_log.startswith("对方"):
|
if last_log.startswith("对方"):
|
||||||
logger.info(f"💡 最后一条消息是对方发送,准备回复。内容: {last_log}")
|
parts = last_log.split(":", 1)
|
||||||
should_reply = True
|
if len(parts) > 1:
|
||||||
|
current_last_content = parts[1].strip()
|
||||||
|
else:
|
||||||
|
current_last_content = last_log
|
||||||
|
|
||||||
|
if current_last_content != self.last_processed_msg:
|
||||||
|
logger.info(f"💡 发现新消息,准备回复。内容: {current_last_content}")
|
||||||
|
should_reply = True
|
||||||
|
else:
|
||||||
|
# logger.info(f"⚪ 消息已回复过,跳过: {current_last_content}")
|
||||||
|
should_reply = False
|
||||||
else:
|
else:
|
||||||
logger.info(f"⚪ 最后一条消息是我发送的,无需回复。内容: {last_log}")
|
# logger.info(f"⚪ 最后一条消息是我发送的,无需回复。")
|
||||||
should_reply = False
|
should_reply = False
|
||||||
|
# 如果最后一条是我发的,重置 last_processed_msg
|
||||||
|
self.last_processed_msg = None
|
||||||
|
|
||||||
if should_reply:
|
if should_reply:
|
||||||
logger.info("🤖 准备调用 LLM 生成回复...")
|
logger.info("🤖 准备调用 LLM 生成回复...")
|
||||||
|
# 立即更新状态,防止在回复生成期间(如果耗时)重复触发
|
||||||
|
self.last_processed_msg = current_last_content
|
||||||
|
|
||||||
reply = await self.get_reply(history_text)
|
reply = await self.get_reply(history_text)
|
||||||
logger.info(f"💡 LLM 回复: {reply}")
|
logger.info(f"💡 LLM 回复: {reply}")
|
||||||
|
|
||||||
if reply and input_center:
|
if reply and input_center:
|
||||||
# 输入并发送
|
# 输入并发送
|
||||||
perform_input_action(self.d, input_center, reply)
|
perform_input_action(self.d, input_center, reply)
|
||||||
# 记录回复时间
|
|
||||||
self.last_interaction_time = time.time()
|
|
||||||
|
|
||||||
# 休眠
|
# 休眠
|
||||||
await asyncio.sleep(CHECK_INTERVAL)
|
await asyncio.sleep(CHECK_INTERVAL)
|
||||||
|
|||||||