This commit is contained in:
HuangHai
2026-01-15 21:29:05 +08:00
parent f1bb71b856
commit b740add57e
12 changed files with 34 additions and 42 deletions

View File

@@ -1,7 +1,7 @@
# 采集配置
SCROLL_DISTANCE_RATIO = 0.5
MAX_STATIONS_COUNT = 10
SCROLL_DISTANCE_RATIO = 0.3
MAX_STATIONS_COUNT =20
FIRST_RUN_ONLY_ONE_STATION = False
REDIS_STATION_EXPIRE = 120
DATA_RETENTION_DAYS = 365

View File

@@ -15,7 +15,7 @@ if project_root not in sys.path:
import uiautomator2 as u2
from Apps.AiTeJiYiChong import Kit
from Apps.AiTeJiYiChong.Kit import take_screenshot
from Apps.AiTeJiYiChong.Kit import take_screenshot, setup_logger
from Apps.AiTeJiYiChong.ReadImageKit import ReadImageKit
from Util.RedisKit import RedisKit
from Apps.AiTeJiYiChong.Service import AiTeJiYiChongService
@@ -30,7 +30,7 @@ from Apps.AiTeJiYiChong.Config.Setting import (
FIRST_RUN_ONLY_ONE_STATION
)
logger = logging.getLogger("AiTeJiYiChongCrawler")
logger = setup_logger("AiTeJiYiChongCrawler")
async def clean_redis_data(redis_kit):
"""
@@ -124,7 +124,7 @@ async def get_station_list(d, service, max_stations_count=MAX_STATIONS_COUNT):
stations = await service.process_station_list_vl(img_to_process, json_metadata, device_info=device_info, max_count=max_stations_count - total_encountered_count)
logger.info(f"本页识别到 {len(stations)} 个有效场站")
ad_top_y_norm = 0.78 # 默认点击边界
ad_top_y_norm = 1.0 - BOTTOM_SAFE_EXCLUDE_RATIO
# 5. 遍历处理本页所有场站
new_stations_processed_in_page = 0
@@ -234,52 +234,44 @@ async def get_station_list(d, service, max_stations_count=MAX_STATIONS_COUNT):
if before_click_md5 != after_click_md5:
entered_price_page = True
# 抓取三级页面图片
price_screenshots = []
price_screenshots.append(after_click_path)
# 1. 向下滚动到底 (根据用户反馈只有不断向下滚动才能看到00点的)
logger.info("正在向下滚动价格列表到底部...")
max_scroll_down = 6
for i in range(max_scroll_down):
before_scroll_path = take_screenshot(d, f"scroll_dn_{i}", save_dir=TEMP_IMAGE_DIR)
logger.info("正在向上滚动价格列表到顶部...")
max_scroll_up_to_top = 8
for i in range(max_scroll_up_to_top):
before_scroll_path = take_screenshot(d, f"aite_price_up_{i}", save_dir=TEMP_IMAGE_DIR)
before_scroll_md5 = Kit.get_image_content_md5(before_scroll_path)
d.swipe_ext("up", scale=0.8) # 向下滚动 = 手势向上
await asyncio.sleep(1.2)
d.swipe_ext("down", scale=0.85)
await asyncio.sleep(1.5)
after_scroll_path = take_screenshot(d, f"scroll_dn_after_{i}", save_dir=TEMP_IMAGE_DIR)
after_scroll_path = take_screenshot(d, f"aite_price_up_after_{i}", save_dir=TEMP_IMAGE_DIR)
after_scroll_md5 = Kit.get_image_content_md5(after_scroll_path)
# 清理临时截图
if os.path.exists(before_scroll_path): os.remove(before_scroll_path)
if os.path.exists(after_scroll_path): os.remove(after_scroll_path)
if before_scroll_md5 == after_scroll_md5:
logger.info(f"价格列表已到达部 (滚动次数: {i})")
logger.info(f"价格列表已到达部 (滚动次数: {i})")
break
# 2. 向上滚动并逐页截图 (从底向上抓取)
logger.info("正在向上滚动价格列表并逐页截图...")
max_scroll_up = 8
for p_idx in range(1, max_scroll_up + 1):
# 截图当前页
logger.info("正在从顶部开始向下逐页截图...")
max_scroll_down_pages = 8
for p_idx in range(1, max_scroll_down_pages + 1):
p_shot = take_screenshot(d, f"price_scroll_{p_idx}_{station_name}", save_dir=TEMP_IMAGE_DIR)
# 检查是否还能向上滚动
before_up_md5 = Kit.get_image_content_md5(p_shot)
d.swipe_ext("down", scale=0.85) # 向上滚动 = 手势向下
await asyncio.sleep(1.5)
before_dn_md5 = Kit.get_image_content_md5(p_shot)
d.swipe_ext("up", scale=0.8)
await asyncio.sleep(1.2)
# 检查是否还有新内容
check_up_path = take_screenshot(d, f"check_up_{p_idx}", save_dir=TEMP_IMAGE_DIR)
after_up_md5 = Kit.get_image_content_md5(check_up_path)
if os.path.exists(check_up_path): os.remove(check_up_path)
check_dn_path = take_screenshot(d, f"check_dn_{p_idx}", save_dir=TEMP_IMAGE_DIR)
after_dn_md5 = Kit.get_image_content_md5(check_dn_path)
if os.path.exists(check_dn_path): os.remove(check_dn_path)
price_screenshots.append(p_shot)
if before_up_md5 == after_up_md5:
logger.info(f"价格列表已到达部 (共抓取页数: {p_idx})")
if before_dn_md5 == after_dn_md5:
logger.info(f"价格列表已到达部 (共抓取页数: {p_idx})")
break
# 后台处理价格图片

View File

@@ -215,6 +215,10 @@ def setup_logger(name, log_file=None, clear_old_log=False):
return logging.getLogger(f"{supplier_code}.{name}")
# 默认使用供应商级别的日志配置
logger = setup_logger("Kit")
def find_template_coords(img_path, template_path, threshold=0.8):
"""
在图片中查找模板并返回中心坐标

View File

@@ -5,15 +5,11 @@ import os
import time
import uiautomator2 as u2
import uuid
from Apps.AiTeJiYiChong.Kit import click_image_template, take_screenshot
from Apps.AiTeJiYiChong.Kit import click_image_template, take_screenshot, setup_logger
from Apps.AiTeJiYiChong.ReadImageKit import ReadImageKit
from Config.Config import TEMP_IMAGE_DIR
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger("OpenAiTeJiYiChong")
logger = setup_logger("OpenAiTeJiYiChong")
BASE_DIR = os.path.dirname(os.path.abspath(__file__))

View File

@@ -19,10 +19,10 @@ from Apps.AiTeJiYiChong.Config.Setting import (
)
from Apps.AiTeJiYiChong import Kit
from Apps.AiTeJiYiChong.Kit import (
read_image, save_image
read_image, save_image, setup_logger
)
logger = logging.getLogger(__name__)
logger = setup_logger("ReadImageKit")
# 场站过滤黑名单
STATION_EXCLUDED_TITLES = {

View File

@@ -17,9 +17,9 @@ from Model.StationProfile import StationProfile
from Model.StationStatus import StationStatus
from Model.StationPriceSchedule import StationPriceSchedule
from Apps.AiTeJiYiChong.Config.Setting import PRICE_FLATTEN_TO_24H
from Apps.AiTeJiYiChong.Kit import setup_logger
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
logger = setup_logger("AiTeJiYiChongService")
class AiTeJiYiChongService:
def __init__(self):