'commit'
This commit is contained in:
@@ -48,7 +48,35 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
from Apps.TeLaiDian import Opener
|
||||
return await Opener.open_mini_program()
|
||||
|
||||
async def clear_ads(self, d, max_rounds=3):
|
||||
"""
|
||||
清理页面上的广告弹窗
|
||||
"""
|
||||
logger.info(f"开始清理广告弹窗,最多尝试 {max_rounds} 轮...")
|
||||
for i in range(max_rounds):
|
||||
ad_screen = take_screenshot(d, f"tld_ad_check_{int(time.time())}.jpg")
|
||||
res = await self.read_image_kit.find_close_button_vlm(ad_screen)
|
||||
|
||||
if res.get("has_ad") and res.get("close_point"):
|
||||
close_point = res.get("close_point")
|
||||
w, h = d.window_size()
|
||||
target_x = int(close_point[0] * w / 1000)
|
||||
target_y = int(close_point[1] * h / 1000)
|
||||
|
||||
logger.info(f"第 {i+1} 轮发现广告: {res.get('reason')},点击关闭: ({target_x}, {target_y})")
|
||||
d.click(target_x, target_y)
|
||||
await asyncio.sleep(1.5)
|
||||
else:
|
||||
logger.info(f"第 {i+1} 轮未发现明显广告,清理结束。")
|
||||
if os.path.exists(ad_screen): os.remove(ad_screen)
|
||||
break
|
||||
|
||||
if os.path.exists(ad_screen): os.remove(ad_screen)
|
||||
|
||||
async def crawl_list_logic(self, d):
|
||||
# 进入列表页先清理广告
|
||||
await self.clear_ads(d)
|
||||
|
||||
processed_count = 0
|
||||
last_md5 = None
|
||||
|
||||
@@ -98,6 +126,8 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
# 返回列表
|
||||
d.press("back")
|
||||
await asyncio.sleep(WAIT_BACK_TO_LIST)
|
||||
# 返回列表后也检查一下是否有新广告弹出
|
||||
await self.clear_ads(d, max_rounds=1)
|
||||
processed_count += 1
|
||||
|
||||
# 滑动到下一页
|
||||
|
||||
@@ -48,6 +48,30 @@ class ReadImageKit:
|
||||
logger.error(f"VLM 寻找价格入口失败: {e}")
|
||||
return {"found": False}
|
||||
|
||||
async def find_close_button_vlm(self, image_path):
|
||||
"""
|
||||
使用 VLM 在页面上寻找关闭按钮(用于清理广告弹窗)
|
||||
"""
|
||||
prompt = """
|
||||
分析这张截图,判断是否存在弹窗广告或遮罩层。
|
||||
如果存在,请找到关闭按钮(通常是圆圈里的 X,或者写着“跳过”、“关闭”的按钮)。
|
||||
|
||||
输出格式为 JSON:
|
||||
{
|
||||
"has_ad": true/false,
|
||||
"reason": "简单描述发现的弹窗",
|
||||
"close_point": [x, y] // 归一化坐标 [0-1000],如果不存在则为 null
|
||||
}
|
||||
"""
|
||||
try:
|
||||
res_text = await self.vlm.analyze_image(image_path, prompt)
|
||||
json_str = self.vlm.extract_json(res_text)
|
||||
data = json.loads(json_str)
|
||||
return data
|
||||
except Exception as e:
|
||||
logger.error(f"VLM 寻找关闭按钮失败: {e}")
|
||||
return {"has_ad": False}
|
||||
|
||||
async def analyze_detail_price(self, image_path):
|
||||
"""
|
||||
分析详情页截图,提取电价信息,包括优惠价、PLUS价和挂牌价
|
||||
|
||||
Reference in New Issue
Block a user