This commit is contained in:
HuangHai
2026-01-14 10:19:20 +08:00
parent 76d2351b07
commit b53388e804
2 changed files with 54 additions and 0 deletions

View File

@@ -48,7 +48,35 @@ class TeLaiDianCrawler(BaseCrawler):
from Apps.TeLaiDian import Opener
return await Opener.open_mini_program()
async def clear_ads(self, d, max_rounds=3):
"""
清理页面上的广告弹窗
"""
logger.info(f"开始清理广告弹窗,最多尝试 {max_rounds} 轮...")
for i in range(max_rounds):
ad_screen = take_screenshot(d, f"tld_ad_check_{int(time.time())}.jpg")
res = await self.read_image_kit.find_close_button_vlm(ad_screen)
if res.get("has_ad") and res.get("close_point"):
close_point = res.get("close_point")
w, h = d.window_size()
target_x = int(close_point[0] * w / 1000)
target_y = int(close_point[1] * h / 1000)
logger.info(f"{i+1} 轮发现广告: {res.get('reason')},点击关闭: ({target_x}, {target_y})")
d.click(target_x, target_y)
await asyncio.sleep(1.5)
else:
logger.info(f"{i+1} 轮未发现明显广告,清理结束。")
if os.path.exists(ad_screen): os.remove(ad_screen)
break
if os.path.exists(ad_screen): os.remove(ad_screen)
async def crawl_list_logic(self, d):
# 进入列表页先清理广告
await self.clear_ads(d)
processed_count = 0
last_md5 = None
@@ -98,6 +126,8 @@ class TeLaiDianCrawler(BaseCrawler):
# 返回列表
d.press("back")
await asyncio.sleep(WAIT_BACK_TO_LIST)
# 返回列表后也检查一下是否有新广告弹出
await self.clear_ads(d, max_rounds=1)
processed_count += 1
# 滑动到下一页

View File

@@ -48,6 +48,30 @@ class ReadImageKit:
logger.error(f"VLM 寻找价格入口失败: {e}")
return {"found": False}
async def find_close_button_vlm(self, image_path):
"""
使用 VLM 在页面上寻找关闭按钮(用于清理广告弹窗)
"""
prompt = """
分析这张截图,判断是否存在弹窗广告或遮罩层。
如果存在,请找到关闭按钮(通常是圆圈里的 X或者写着“跳过”、“关闭”的按钮
输出格式为 JSON
{
"has_ad": true/false,
"reason": "简单描述发现的弹窗",
"close_point": [x, y] // 归一化坐标 [0-1000],如果不存在则为 null
}
"""
try:
res_text = await self.vlm.analyze_image(image_path, prompt)
json_str = self.vlm.extract_json(res_text)
data = json.loads(json_str)
return data
except Exception as e:
logger.error(f"VLM 寻找关闭按钮失败: {e}")
return {"has_ad": False}
async def analyze_detail_price(self, image_path):
"""
分析详情页截图提取电价信息包括优惠价、PLUS价和挂牌价