This commit is contained in:
HuangHai
2026-01-16 20:00:41 +08:00
parent 8c2e8bcb0a
commit bd4c2c80e3
5 changed files with 30 additions and 20 deletions

View File

@@ -218,7 +218,7 @@ class TeLaiDianCrawler(BaseCrawler):
logger.info(f"详情页快照已保存: {detail_check_path}")
# 简单验证:如果标题包含 "我的卡券"、"优惠券"、"新人福利" 等,说明点错了
is_wrong_page = await self.check_wrong_page(d, detail_check_path)
is_wrong_page = await self.check_wrong_page(d, detail_check_path, context="detail")
if is_wrong_page:
logger.warning(f"检测到进入了错误页面,尝试返回列表...")
# 尝试点击左上角的返回箭头,如果没有,则执行系统 back
@@ -268,15 +268,24 @@ class TeLaiDianCrawler(BaseCrawler):
self.pending_price_tasks.clear()
logger.info("[收尾] 后台价格任务已全部完成")
async def check_wrong_page(self, d, image_path):
async def check_wrong_page(self, d, image_path, context="list"):
"""
检查是否误触进入了错误的页面(如:我的卡券、活动页等)
"""
data = await self.read_image_kit.check_wrong_page_vlm(image_path)
is_detail = data.get("is_detail_page", True)
if not is_detail:
logger.warning(f"⚠️ 确认进入错误页面: {data.get('page_type')} ({data.get('reason')})")
return not is_detail
page_type = data.get("page_type")
reason = data.get("reason")
if is_detail:
return False
if context == "detail" and page_type == "other":
logger.warning(f"⚠️ 详情页首轮检测结果为 other将视为加载异常页面继续尝试解析。原因: {reason}")
return False
logger.warning(f"⚠️ 确认进入错误页面: {page_type} ({reason})")
return True
async def crawl_detail_logic(self, d, station_info):
"""

View File

@@ -1,23 +1,24 @@
import asyncio
import logging
import uuid
import os
import sys
import json
import time
from datetime import datetime
from Apps.YeLiTe.Kit import take_screenshot, clean_station_name, get_image_content_md5, detect_price_info_container_cv
from Apps.YeLiTe.ReadImageKit import ReadImageKit
from Apps.YeLiTe.Service import YiLaiTeService
import uuid
import uiautomator2 as u2
from Apps.YeLiTe.Config.Setting import (
SCROLL_DISTANCE_RATIO, WAIT_AFTER_SCROLL, MAX_STATIONS_COUNT,
WAIT_DETAIL_PAGE_LOAD, WAIT_BACK_TO_LIST, TEST_CLEAR_REDIS,
SAFE_EXCLUDE_RATIO, BOTTOM_SAFE_EXCLUDE_RATIO,
FIRST_RUN_ONLY_ONE_STATION
)
from Util.RedisKit import RedisKit
from Apps.YeLiTe.Kit import take_screenshot, clean_station_name, get_image_content_md5, detect_price_info_container_cv, \
get_image_md5
from Apps.YeLiTe.ReadImageKit import ReadImageKit
from Apps.YeLiTe.Service import YeLiTeService
from Core.BaseCrawler import BaseCrawler
import uiautomator2 as u2
from Util.RedisKit import RedisKit
# 项目根目录处理
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -25,11 +26,11 @@ if project_root not in sys.path:
sys.path.append(project_root)
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("YiLaiTeCrawler")
logger = logging.getLogger("YeLiTeCrawler")
class YiLaiTeCrawler(BaseCrawler):
def __init__(self, service=None):
super().__init__(service or YiLaiTeService())
super().__init__(service or YeLiTeService())
self.read_image_kit = ReadImageKit()
self.redis_kit = RedisKit()
@@ -341,7 +342,7 @@ class YiLaiTeCrawler(BaseCrawler):
async def main(service=None):
if service is None:
service = YiLaiTeService()
service = YeLiTeService()
await service.init_db()
crawler = YiLaiTeCrawler(service)
@@ -367,7 +368,7 @@ async def main(service=None):
finally:
await service.log_task_end(task_id, total_count, status, error_msg)
# 如果是内部初始化的 service则关闭
if service and not isinstance(service, YiLaiTeService):
if service and not isinstance(service, YeLiTeService):
await service.close_db()
async def get_image_md5_async(path):

View File

@@ -20,7 +20,7 @@ from Apps.YeLiTe.Config.Setting import PRICE_FLATTEN_TO_24H
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
class YiLaiTeService:
class YeLiTeService:
def __init__(self):
self.db = Db(db_url=DB_URL)
self.station_profile_model = StationProfile()