'commit'
This commit is contained in:
@@ -144,6 +144,16 @@ class ChatMonitorBot:
|
||||
"""主运行循环"""
|
||||
logger.info("🚀 大张老师自动巡课系统启动 (T2 增强版)...")
|
||||
|
||||
# 定义 JSON 序列化辅助函数
|
||||
def numpy_serializer(obj):
|
||||
if isinstance(obj, np.integer):
|
||||
return int(obj)
|
||||
if isinstance(obj, np.floating):
|
||||
return float(obj)
|
||||
if isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
raise TypeError(f"Type {type(obj)} not serializable")
|
||||
|
||||
# 1. 环境准备
|
||||
if not self.step_1_prepare_env(): return
|
||||
if not self.step_2_connect_device(): return
|
||||
@@ -171,22 +181,18 @@ class ChatMonitorBot:
|
||||
# 格式化输出:[发送者] 内容 (类型)
|
||||
sender = msg.get('sender', '未知')
|
||||
content = msg.get('content', '')
|
||||
msg_type = msg.get('type', 'unknown')
|
||||
logger.info(f"[{sender}] {content} ({msg_type})")
|
||||
msg_type = "语音" if msg.get('type') == 'voice' else "文字"
|
||||
|
||||
# 按照用户要求的格式输出
|
||||
logger.info(f"说话人: {sender}")
|
||||
logger.info(f"消息类型: {msg_type}")
|
||||
logger.info(f"消息内容: {content}")
|
||||
logger.info("-" * 20)
|
||||
logger.info("="*50 + "\n")
|
||||
|
||||
# 初始化最后处理的消息哈希,避免重复回复第一条
|
||||
last_msg = self.dialogue_log[-1]
|
||||
# last_msg 是字典,需要转字符串再 encode
|
||||
def numpy_serializer(obj):
|
||||
if isinstance(obj, np.integer):
|
||||
return int(obj)
|
||||
if isinstance(obj, np.floating):
|
||||
return float(obj)
|
||||
if isinstance(obj, np.ndarray):
|
||||
return obj.tolist()
|
||||
raise TypeError(f"Type {type(obj)} not serializable")
|
||||
|
||||
msg_str = json.dumps(last_msg, sort_keys=True, ensure_ascii=False, default=numpy_serializer)
|
||||
self.last_processed_msg_hash = hashlib.md5(msg_str.encode('utf-8')).hexdigest()
|
||||
self.last_screen_hash = self.get_image_hash(self.screenshot_path)
|
||||
@@ -233,10 +239,13 @@ class ChatMonitorBot:
|
||||
|
||||
# D. 只关注最后一条消息
|
||||
last_msg = dialogue_log[-1]
|
||||
current_msg_hash = hashlib.md5(last_msg.encode('utf-8')).hexdigest()
|
||||
# last_msg 是字典,需要序列化
|
||||
msg_str = json.dumps(last_msg, sort_keys=True, ensure_ascii=False, default=numpy_serializer)
|
||||
current_msg_hash = hashlib.md5(msg_str.encode('utf-8')).hexdigest()
|
||||
|
||||
# E. 判断是否需要回复 (对方发送且非重复消息)
|
||||
if "对方:" in last_msg:
|
||||
sender = last_msg.get('sender', '')
|
||||
if sender == "对方":
|
||||
if current_msg_hash != self.last_processed_msg_hash:
|
||||
event_shot = WxUtil.get_next_debug_path("event_new_msg")
|
||||
self.device.screenshot(event_shot)
|
||||
|
||||
106
WeiXin/WxUtil.py
106
WeiXin/WxUtil.py
@@ -173,10 +173,35 @@ def _scan_chat_messages(image_path):
|
||||
logger.info("正在执行 OCR 识别...")
|
||||
ocr_results = ocr_kit.read_text(image_path)
|
||||
|
||||
# 4.5 尝试提取聊天标题 (对方昵称)
|
||||
chat_title = "对方"
|
||||
potential_titles = []
|
||||
for bbox, text, conf in ocr_results:
|
||||
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
|
||||
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
|
||||
# 标题区域通常在顶部 (状态栏下方,消息列表上方)
|
||||
if 60 < c_y < 140:
|
||||
clean = text.strip()
|
||||
# 排除时间、信号、返回按钮等
|
||||
if re.match(r'^\d{1,2}:\d{2}$', clean): continue
|
||||
if "微信" in clean or "WeChat" in clean: continue
|
||||
if clean in ["<", "返回", "消息", "(", ")"]: continue
|
||||
if re.match(r'^\d+$', clean): continue # 排除纯数字(如未读数)
|
||||
if len(clean) > 0:
|
||||
potential_titles.append((c_x, clean))
|
||||
|
||||
if potential_titles:
|
||||
# 优先取最接近水平中心的文本作为标题
|
||||
potential_titles.sort(key=lambda x: abs(x[0] - w/2))
|
||||
chat_title = potential_titles[0][1]
|
||||
# 去除可能包含的括号(比如备注名后的群聊人数,虽然后面会被截断)
|
||||
chat_title = re.sub(r'\(\d+\)$', '', chat_title).strip()
|
||||
logger.info(f"识别到聊天标题/对方昵称: {chat_title}")
|
||||
|
||||
# 微信菜单关键字(用于排除干扰)
|
||||
MENU_KEYWORDS = ["听筒播放", "收藏", "背景播放", "删除", "多选", "取消转文字", "转文字", "引用", "提醒"]
|
||||
# 忽略的系统消息内容
|
||||
IGNORE_CONTENT = ["撤回了一条消息", "打招呼的消息", "拍了拍", "你撤回了一条消息", "引用"]
|
||||
IGNORE_CONTENT = ["撤回了一条消息", "打招呼的消息", "拍了拍", "你撤回了一条消息", "引用", "Clear Text", "Switch IME", "Done"]
|
||||
|
||||
# 5. 整合所有消息
|
||||
messages = []
|
||||
@@ -188,6 +213,8 @@ def _scan_chat_messages(image_path):
|
||||
cv2.putText(debug_img, "TOP_FILTER", (10, 140), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
|
||||
cv2.putText(debug_img, "BOTTOM_FILTER", (10, h - 110), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 255), 1)
|
||||
|
||||
claimed_ocr_indices = set()
|
||||
|
||||
# A. 添加语音消息
|
||||
for ax, ay in audio_matches:
|
||||
# 标记所有找到的语音图标 (用于调试)
|
||||
@@ -211,12 +238,18 @@ def _scan_chat_messages(image_path):
|
||||
# 改进:判断是否已转文字
|
||||
is_converted = False
|
||||
converted_trigger_text = ""
|
||||
for bbox, text, conf in ocr_results:
|
||||
associated_texts = [] # 存储关联的多行文本 [(y, x, text)]
|
||||
|
||||
for i, (bbox, text, conf) in enumerate(ocr_results):
|
||||
if i in claimed_ocr_indices: continue
|
||||
|
||||
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
|
||||
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
|
||||
|
||||
# 判定逻辑:文本在语音下方且水平偏移不大
|
||||
if 30 < c_y - ay < 600 and abs(c_x - ax) < 600:
|
||||
# 判定逻辑:文本在语音下方且水平偏移不大 (放宽 Y 轴限制以包含侧边的时长文本)
|
||||
# 2025-01-26: 增加 X 轴范围到 900 以适配超长语音条的右侧时长/文本
|
||||
# 增加 Y 轴范围到 800 以适配多行转文字内容
|
||||
if -50 < c_y - ay < 800 and abs(c_x - ax) < 900:
|
||||
# 检查中间是否有其他语音图标
|
||||
has_intermediate_audio = False
|
||||
for other_ax, other_ay in audio_matches:
|
||||
@@ -236,24 +269,35 @@ def _scan_chat_messages(image_path):
|
||||
# 判定是否为系统消息
|
||||
is_ignored = any(k in clean_text for k in IGNORE_CONTENT)
|
||||
|
||||
if not is_duration and not is_timestamp and clean_text not in MENU_KEYWORDS and not is_ignored:
|
||||
# 噪音判定 (例如 "少3"")
|
||||
is_noise = "少" in clean_text and len(clean_text) < 8 and re.search(r'\d', clean_text)
|
||||
|
||||
if not is_duration and not is_timestamp and clean_text not in MENU_KEYWORDS and not is_ignored and not is_noise:
|
||||
is_converted = True
|
||||
# 针对 "少3"" 这种特殊噪点进行过滤,但仍标记为已转换
|
||||
# 如果包含 "少" 且长度短且包含数字,视为噪点 (例如 "少3"")
|
||||
if "少" in clean_text and len(clean_text) < 6 and re.search(r'\d', clean_text):
|
||||
logger.info(f"语音({ax},{ay}) 判定为已转换,但内容判定为噪点('{clean_text}'),置为空")
|
||||
converted_trigger_text = ""
|
||||
else:
|
||||
converted_trigger_text = clean_text
|
||||
logger.info(f"语音({ax},{ay}) 判定为已转换,关联到有效文本: '{clean_text}'")
|
||||
break
|
||||
associated_texts.append((c_y, c_x, clean_text))
|
||||
claimed_ocr_indices.add(i)
|
||||
# 不再 break,继续寻找后续文本行
|
||||
else:
|
||||
# 这些文本虽然不作为内容,但它们属于语音消息的附属信息,标记为已处理
|
||||
claimed_ocr_indices.add(i)
|
||||
|
||||
if is_timestamp:
|
||||
logger.info(f"语音({ax},{ay}) 忽略下方时间戳文本: '{clean_text}'")
|
||||
elif is_duration:
|
||||
logger.info(f"语音({ax},{ay}) 忽略时长文本: '{clean_text}'")
|
||||
elif is_noise:
|
||||
logger.info(f"语音({ax},{ay}) 忽略噪音文本: '{clean_text}'")
|
||||
elif is_ignored:
|
||||
logger.info(f"语音({ax},{ay}) 忽略系统消息文本: '{clean_text}'")
|
||||
else:
|
||||
logger.info(f"语音({ax},{ay}) 忽略其他文本(可能是菜单): '{clean_text}'")
|
||||
|
||||
# 整合所有关联文本
|
||||
if associated_texts:
|
||||
# 按 Y 轴排序,如果 Y 接近则按 X 轴排序
|
||||
associated_texts.sort(key=lambda x: (x[0], x[1]))
|
||||
converted_trigger_text = "".join([t[2] for t in associated_texts])
|
||||
logger.info(f"语音({ax},{ay}) 判定为已转换,最终合并文本: '{converted_trigger_text}'")
|
||||
|
||||
if is_converted:
|
||||
logger.info(f"语音消息 ({ax}, {ay}) 已有转换文字: '{converted_trigger_text}',跳过")
|
||||
@@ -275,7 +319,8 @@ def _scan_chat_messages(image_path):
|
||||
})
|
||||
|
||||
# B. 添加文本消息
|
||||
for bbox, text, conf in ocr_results:
|
||||
for i, (bbox, text, conf) in enumerate(ocr_results):
|
||||
if i in claimed_ocr_indices: continue
|
||||
c_x = int((bbox[0][0] + bbox[2][0]) / 2)
|
||||
c_y = int((bbox[0][1] + bbox[2][1]) / 2)
|
||||
|
||||
@@ -284,12 +329,25 @@ def _scan_chat_messages(image_path):
|
||||
|
||||
time_pattern = r'(\d{4}年|\d{1,2}月|\d{1,2}日|\d{1,2}:\d{2}|昨天|今天|星期|上午|下午|晚上)'
|
||||
if len(text) < 20 and (re.search(time_pattern, text) or re.match(r'^[0-9:\s日年月\-]+$', text)):
|
||||
logger.info(f"忽略时间戳/日期文本: '{text}'")
|
||||
continue
|
||||
|
||||
clean_text = text.strip()
|
||||
if re.match(r'^.?[0-9]{1,2}"?$', clean_text): continue
|
||||
if clean_text in MENU_KEYWORDS: continue
|
||||
if any(k in clean_text for k in IGNORE_CONTENT): continue
|
||||
if re.match(r'^.?[0-9]{1,2}"?$', clean_text):
|
||||
logger.info(f"忽略疑似时长文本: '{clean_text}'")
|
||||
continue
|
||||
|
||||
# 噪音判定 (例如 "少3"")
|
||||
if "少" in clean_text and len(clean_text) < 8 and re.search(r'\d', clean_text):
|
||||
logger.info(f"忽略噪音文本: '{clean_text}'")
|
||||
continue
|
||||
|
||||
if clean_text in MENU_KEYWORDS:
|
||||
logger.info(f"忽略菜单关键词: '{clean_text}'")
|
||||
continue
|
||||
if any(k in clean_text for k in IGNORE_CONTENT):
|
||||
logger.info(f"忽略系统消息内容: '{clean_text}'")
|
||||
continue
|
||||
|
||||
left_x = bbox[0][0]
|
||||
sender = "对方" if left_x < w * 0.5 else "我"
|
||||
@@ -304,7 +362,7 @@ def _scan_chat_messages(image_path):
|
||||
|
||||
# 6. 排序
|
||||
messages.sort(key=lambda x: x['y'])
|
||||
return messages, debug_img
|
||||
return messages, debug_img, chat_title
|
||||
|
||||
async def analyze_chat_image(image_path, output_path, device=None, target_name="对方", process_strategy="ALL"):
|
||||
"""
|
||||
@@ -339,10 +397,16 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
|
||||
logger.info(f"--- 分析循环 第 {loop_count} 次 ---")
|
||||
|
||||
# 1. 扫描当前屏幕
|
||||
messages, debug_img = _scan_chat_messages(current_image_path)
|
||||
messages, debug_img, chat_title = _scan_chat_messages(current_image_path)
|
||||
if messages is None: # 读取失败
|
||||
return [], None
|
||||
|
||||
# 更新消息发送者名称 (将 "对方" 替换为 实际标题)
|
||||
if chat_title and chat_title != "对方":
|
||||
for m in messages:
|
||||
if m['sender'] == "对方":
|
||||
m['sender'] = chat_title
|
||||
|
||||
# 保存当前状态的调试图
|
||||
if current_output_path:
|
||||
cv2.imwrite(current_output_path, debug_img)
|
||||
@@ -432,7 +496,7 @@ async def analyze_chat_image(image_path, output_path, device=None, target_name="
|
||||
peek_shot = get_next_debug_path("step_peek_content")
|
||||
d.screenshot(peek_shot)
|
||||
logger.info("正在读取转换后的语音内容...")
|
||||
peek_messages, _ = _scan_chat_messages(peek_shot)
|
||||
peek_messages, _, _ = _scan_chat_messages(peek_shot)
|
||||
|
||||
# 2. 查找并保存内容
|
||||
found_content = None
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user