'commit'
This commit is contained in:
@@ -218,7 +218,7 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
logger.info(f"详情页快照已保存: {detail_check_path}")
|
||||
|
||||
# 简单验证:如果标题包含 "我的卡券"、"优惠券"、"新人福利" 等,说明点错了
|
||||
is_wrong_page = await self.check_wrong_page(d, detail_check_path)
|
||||
is_wrong_page = await self.check_wrong_page(d, detail_check_path, context="detail")
|
||||
if is_wrong_page:
|
||||
logger.warning(f"检测到进入了错误页面,尝试返回列表...")
|
||||
# 尝试点击左上角的返回箭头,如果没有,则执行系统 back
|
||||
@@ -268,15 +268,24 @@ class TeLaiDianCrawler(BaseCrawler):
|
||||
self.pending_price_tasks.clear()
|
||||
logger.info("[收尾] 后台价格任务已全部完成")
|
||||
|
||||
async def check_wrong_page(self, d, image_path):
|
||||
async def check_wrong_page(self, d, image_path, context="list"):
|
||||
"""
|
||||
检查是否误触进入了错误的页面(如:我的卡券、活动页等)
|
||||
"""
|
||||
data = await self.read_image_kit.check_wrong_page_vlm(image_path)
|
||||
is_detail = data.get("is_detail_page", True)
|
||||
if not is_detail:
|
||||
logger.warning(f"⚠️ 确认进入错误页面: {data.get('page_type')} ({data.get('reason')})")
|
||||
return not is_detail
|
||||
page_type = data.get("page_type")
|
||||
reason = data.get("reason")
|
||||
|
||||
if is_detail:
|
||||
return False
|
||||
|
||||
if context == "detail" and page_type == "other":
|
||||
logger.warning(f"⚠️ 详情页首轮检测结果为 other,将视为加载异常页面,继续尝试解析。原因: {reason}")
|
||||
return False
|
||||
|
||||
logger.warning(f"⚠️ 确认进入错误页面: {page_type} ({reason})")
|
||||
return True
|
||||
|
||||
async def crawl_detail_logic(self, d, station_info):
|
||||
"""
|
||||
|
||||
Binary file not shown.
@@ -1,23 +1,24 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import uuid
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime
|
||||
from Apps.YeLiTe.Kit import take_screenshot, clean_station_name, get_image_content_md5, detect_price_info_container_cv
|
||||
from Apps.YeLiTe.ReadImageKit import ReadImageKit
|
||||
from Apps.YeLiTe.Service import YiLaiTeService
|
||||
import uuid
|
||||
|
||||
import uiautomator2 as u2
|
||||
|
||||
from Apps.YeLiTe.Config.Setting import (
|
||||
SCROLL_DISTANCE_RATIO, WAIT_AFTER_SCROLL, MAX_STATIONS_COUNT,
|
||||
WAIT_DETAIL_PAGE_LOAD, WAIT_BACK_TO_LIST, TEST_CLEAR_REDIS,
|
||||
SAFE_EXCLUDE_RATIO, BOTTOM_SAFE_EXCLUDE_RATIO,
|
||||
FIRST_RUN_ONLY_ONE_STATION
|
||||
)
|
||||
from Util.RedisKit import RedisKit
|
||||
from Apps.YeLiTe.Kit import take_screenshot, clean_station_name, get_image_content_md5, detect_price_info_container_cv, \
|
||||
get_image_md5
|
||||
from Apps.YeLiTe.ReadImageKit import ReadImageKit
|
||||
from Apps.YeLiTe.Service import YeLiTeService
|
||||
from Core.BaseCrawler import BaseCrawler
|
||||
import uiautomator2 as u2
|
||||
from Util.RedisKit import RedisKit
|
||||
|
||||
# 项目根目录处理
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
@@ -25,11 +26,11 @@ if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger("YiLaiTeCrawler")
|
||||
logger = logging.getLogger("YeLiTeCrawler")
|
||||
|
||||
class YiLaiTeCrawler(BaseCrawler):
|
||||
def __init__(self, service=None):
|
||||
super().__init__(service or YiLaiTeService())
|
||||
super().__init__(service or YeLiTeService())
|
||||
self.read_image_kit = ReadImageKit()
|
||||
self.redis_kit = RedisKit()
|
||||
|
||||
@@ -341,7 +342,7 @@ class YiLaiTeCrawler(BaseCrawler):
|
||||
|
||||
async def main(service=None):
|
||||
if service is None:
|
||||
service = YiLaiTeService()
|
||||
service = YeLiTeService()
|
||||
await service.init_db()
|
||||
|
||||
crawler = YiLaiTeCrawler(service)
|
||||
@@ -367,7 +368,7 @@ async def main(service=None):
|
||||
finally:
|
||||
await service.log_task_end(task_id, total_count, status, error_msg)
|
||||
# 如果是内部初始化的 service,则关闭
|
||||
if service and not isinstance(service, YiLaiTeService):
|
||||
if service and not isinstance(service, YeLiTeService):
|
||||
await service.close_db()
|
||||
|
||||
async def get_image_md5_async(path):
|
||||
|
||||
@@ -20,7 +20,7 @@ from Apps.YeLiTe.Config.Setting import PRICE_FLATTEN_TO_24H
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class YiLaiTeService:
|
||||
class YeLiTeService:
|
||||
def __init__(self):
|
||||
self.db = Db(db_url=DB_URL)
|
||||
self.station_profile_model = StationProfile()
|
||||
|
||||
@@ -10,7 +10,7 @@ if project_root not in sys.path:
|
||||
sys.path.append(project_root)
|
||||
|
||||
try:
|
||||
from Apps.YeLiTe.Service import YiLaiTeService
|
||||
from Apps.YeLiTe.Service import YeLiTeService
|
||||
from Apps.YeLiTe import Opener, Crawler, Kit
|
||||
# 配置日志
|
||||
logger = Kit.setup_logger("T3_YeLiTe", clear_old_log=True)
|
||||
@@ -26,7 +26,7 @@ async def run_process():
|
||||
|
||||
# 步骤 0: 初始化基础服务
|
||||
logger.info(">>> 步骤 0: 初始化基础服务 (数据库连接)...")
|
||||
service = YiLaiTeService()
|
||||
service = YeLiTeService()
|
||||
await service.init_db()
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user