582 lines
26 KiB
Python
582 lines
26 KiB
Python
# coding=utf-8
|
||
import asyncio
|
||
import logging
|
||
import uuid
|
||
import os
|
||
import sys
|
||
import json
|
||
import time
|
||
import hashlib
|
||
from PIL import Image
|
||
|
||
# 将项目根目录添加到 sys.path
|
||
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||
if project_root not in sys.path:
|
||
sys.path.append(project_root)
|
||
|
||
import uiautomator2 as u2
|
||
from Core.BaseCrawler import BaseCrawler
|
||
from Util import Kit
|
||
from Util.Kit import take_screenshot
|
||
from Util.ObsUtil import ObsUploader
|
||
from Util.RedisKit import RedisKit
|
||
from Util.PaddleOCRKit import get_ocr_kit
|
||
import cv2
|
||
from Apps.XinDianTu.Service import XinDianTuService
|
||
from Config.Config import (
|
||
OBS_TMP_PREFIX, CDN_DOMAIN, SCROLL_DISTANCE_RATIO,
|
||
MAX_SCROLLS, REDIS_STATION_EXPIRE,
|
||
WAIT_DETAIL_PAGE_LOAD, WAIT_BACK_TO_LIST, WAIT_AFTER_SCROLL,
|
||
MAX_CRAWL_DISTANCE, TEMP_IMAGE_DIR
|
||
)
|
||
|
||
# --- 用户配置区域 ---
|
||
# 是否保留截图文件(True=保留备查,False=随用随删)
|
||
KEEP_SCREENSHOTS = True
|
||
|
||
# [Testing] 是否在启动时清除 Redis 中的场站处理记录
|
||
# True: 每次运行前清除记录,方便反复测试同一个场站
|
||
# False: 生产模式,保留记录以避免重复爬取
|
||
TEST_CLEAR_REDIS = True
|
||
|
||
# 配置说明:
|
||
# SCROLL_DISTANCE_RATIO 控制翻页时的滑动距离(在 Config.py 中修改)。
|
||
# 对于华为 Mate 20x 等分辨率较低或屏幕较小的手机,如果发现翻页时跳过了某些场站,
|
||
# 请尝试减小 SCROLL_DISTANCE_RATIO(例如设置为 0.4 或 0.3)。
|
||
# 这样每次滑动的距离变短,可以确保所有场站都能被完整显示并识别。
|
||
|
||
# 配置日志输出
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||
handlers=[
|
||
logging.StreamHandler(sys.stdout)
|
||
]
|
||
)
|
||
logger = logging.getLogger("StationList")
|
||
|
||
# 强制设置所有相关模块的日志级别为 INFO,防止被第三方库干扰
|
||
logging.getLogger("PaddleOCRKit").setLevel(logging.INFO)
|
||
logging.getLogger("Util.Kit").setLevel(logging.INFO)
|
||
logging.getLogger("OpenXinDianTu").setLevel(logging.INFO)
|
||
logging.getLogger("FullProcess").setLevel(logging.INFO)
|
||
|
||
class XinDianTuCrawler(BaseCrawler):
|
||
"""
|
||
新电途小程序爬虫实现
|
||
"""
|
||
def __init__(self, service=None):
|
||
super().__init__(service)
|
||
# 初始化配置参数
|
||
self.max_scrolls = MAX_SCROLLS
|
||
self.uploader = ObsUploader()
|
||
self.redis_kit = RedisKit()
|
||
|
||
async def start(self):
|
||
"""
|
||
实现 BaseCrawler 的启动入口
|
||
"""
|
||
# 兼容旧逻辑,直接调用 main
|
||
await main(self.service)
|
||
|
||
async def open_app(self):
|
||
# 实际逻辑在 Opener.py,此处可作为封装层
|
||
pass
|
||
|
||
async def crawl_list(self):
|
||
# 实际逻辑在 get_station_list,此处可作为封装层
|
||
pass
|
||
|
||
async def crawl_detail(self, station_info):
|
||
pass
|
||
|
||
async def get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS):
|
||
"""
|
||
获取场站列表并处理翻页
|
||
"""
|
||
all_stations = []
|
||
seen_names = set()
|
||
redis_kit = RedisKit()
|
||
|
||
# 确保在“找桩”或者首页列表界面
|
||
window_size = d.window_size()
|
||
w, h = window_size[0], window_size[1]
|
||
|
||
# 获取设备信息以用于坐标计算
|
||
device_info = d.info
|
||
device_info['width'] = w
|
||
device_info['height'] = h
|
||
|
||
logger.info(f"设备信息: {device_info.get('productName')} | 窗口分辨率: {w}x{h}")
|
||
|
||
max_distance_reached = False
|
||
stop_reason = "max_scrolls_reached" # 默认停止原因:达到最大翻页次数
|
||
|
||
# 存储后台异步任务
|
||
background_tasks = []
|
||
|
||
for i in range(max_scrolls + 1):
|
||
if max_distance_reached:
|
||
logger.info("已达到最大抓取距离,停止翻页。")
|
||
stop_reason = "max_distance_reached"
|
||
break
|
||
logger.info(f"正在处理第 {i + 1} 页...")
|
||
|
||
# 1. 拍摄截图
|
||
logger.info(f"Step [1/6] 正在拍摄列表页截图...")
|
||
t_shot = time.time()
|
||
image_uuid = str(uuid.uuid4())
|
||
# 使用相对路径: 基于当前脚本目录下的 Images 文件夹
|
||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
||
save_dir = TEMP_IMAGE_DIR
|
||
screenshot_path = take_screenshot(d, image_uuid, save_dir=save_dir)
|
||
logger.info(f"Step [1/6] 列表页截图已完成: {screenshot_path} (耗时: {time.time() - t_shot:.2f}s)")
|
||
|
||
# 校验截图实际尺寸
|
||
img_w, img_h = 0, 0
|
||
try:
|
||
with Image.open(screenshot_path) as img:
|
||
img_w, img_h = img.size
|
||
logger.info(f"截图实际尺寸: {img_w}x{img_h}")
|
||
# 如果截图尺寸与窗口尺寸不符,更新 device_info 以适配坐标换算
|
||
if img_w != w or img_h != h:
|
||
logger.warning(f"检测到截图尺寸 ({img_w}x{img_h}) 与窗口尺寸 ({w}x{h}) 不一致,将以截图尺寸为准进行坐标换算。")
|
||
device_info['width'] = img_w
|
||
device_info['height'] = img_h
|
||
except Exception as e:
|
||
logger.warning(f"无法获取截图尺寸信息: {e}")
|
||
|
||
# 2. 使用 Kit.crop_cards_from_image 进行切片和生成 _vl.jpg
|
||
logger.info("Step [2/6] 正在执行本地图形学切片分析,识别场站卡片并生成绿色方框图...")
|
||
t_crop = time.time()
|
||
# 这会生成 _vl.jpg 和 .json
|
||
Kit.crop_cards_from_image(screenshot_path)
|
||
logger.info(f"Step [2/6] 图形学切片分析完成 (耗时: {time.time() - t_crop:.2f}s)")
|
||
|
||
# 读取生成的元数据
|
||
json_path = screenshot_path.replace(".jpg", ".json")
|
||
vl_img_path = screenshot_path.replace(".jpg", "_vl.jpg")
|
||
|
||
if not os.path.exists(json_path) or not os.path.exists(vl_img_path):
|
||
logger.error(f"❌ Step [2/6] 失败: 未生成有效的 JSON 或 VL 图片 (JSON: {os.path.exists(json_path)}, VL: {os.path.exists(vl_img_path)})")
|
||
logger.warning("可能未识别到任何卡片,跳过当前页")
|
||
continue
|
||
|
||
logger.info(f"Step [3/6] 绿色方框图已生成: {vl_img_path}")
|
||
|
||
with open(json_path, 'r', encoding='utf-8') as f:
|
||
json_metadata = json.load(f)
|
||
|
||
# --- New Logic: Local OCR via PaddleOCRKit ---
|
||
logger.info("Step [4/6] 正在开始本地 OCR 识别流程,将识别出的卡片逐一交给 OCR...")
|
||
t_ocr_total = time.time()
|
||
|
||
stations = []
|
||
try:
|
||
ocr_kit = get_ocr_kit()
|
||
|
||
# 读取原始截图进行裁剪 (比读取 _vl.jpg 更干净,没有绿框干扰)
|
||
original_img = cv2.imread(screenshot_path)
|
||
|
||
if "cards" in json_metadata and original_img is not None:
|
||
h_img, w_img = original_img.shape[:2]
|
||
num_cards = len(json_metadata['cards'])
|
||
logger.info(f"检测到 {num_cards} 个场站卡片,准备开始逐一 OCR 识别...")
|
||
|
||
for idx, card in enumerate(json_metadata["cards"]):
|
||
# card: {"id": 1, "rect": [x1, y1, x2, y2], "click_point": [cx, cy]}
|
||
rect = card.get("rect")
|
||
if not rect: continue
|
||
x1, y1, x2, y2 = rect
|
||
|
||
# 边界检查
|
||
x1 = max(0, min(x1, w_img))
|
||
x2 = max(0, min(x2, w_img))
|
||
y1 = max(0, min(y1, h_img))
|
||
y2 = max(0, min(y2, h_img))
|
||
|
||
if x2 <= x1 or y2 <= y1: continue
|
||
|
||
# 裁剪卡片
|
||
cropped_card = original_img[y1:y2, x1:x2]
|
||
|
||
# 识别
|
||
logger.info(f"正在识别第 {idx+1}/{num_cards} 个卡片 (区域: {rect})...")
|
||
t_card_start = time.time()
|
||
parsed_data = ocr_kit.recognize(cropped_card)
|
||
t_card_end = time.time()
|
||
|
||
# Log detailed OCR result
|
||
logger.info(f"卡片 {idx+1} OCR 识别耗时: {t_card_end - t_card_start:.2f}s")
|
||
|
||
if parsed_data and parsed_data.get("station_name"):
|
||
# 格式化数据以匹配原有结构
|
||
piles_list = parsed_data.get("piles", [])
|
||
piles_str_parts = []
|
||
for p in piles_list:
|
||
p_type = p.get("type", "")
|
||
p_free = p.get("free", 0)
|
||
p_total = p.get("total", 0)
|
||
piles_str_parts.append(f"{p_type}:{p_free}/{p_total}")
|
||
|
||
piles_str = " ".join(piles_str_parts)
|
||
|
||
station_info = {
|
||
"station_name": parsed_data.get("station_name"),
|
||
"price": str(parsed_data.get("price")) if parsed_data.get("price") is not None else "",
|
||
"pro_price": "", # 暂未解析
|
||
"piles": piles_str,
|
||
"distance": parsed_data.get("distance", ""),
|
||
"uia_center_x": card["click_point"][0],
|
||
"uia_center_y": card["click_point"][1],
|
||
"tags": parsed_data.get("tags", []),
|
||
"parking_info": parsed_data.get("parking", "")
|
||
}
|
||
logger.info(f"✅ 成功解析场站: {station_info['station_name']} | 距离: {station_info['distance']}")
|
||
|
||
# 立即记录场站基础状态信息 (电桩、价格等)
|
||
try:
|
||
await service.record_station_status(station_info)
|
||
except Exception as e:
|
||
logger.error(f"❌ 记录场站状态失败: {e}")
|
||
|
||
stations.append(station_info)
|
||
else:
|
||
logger.warning(f"⚠️ 卡片 {idx+1} 未能识别出有效场站名称")
|
||
|
||
except Exception as e:
|
||
logger.error(f"❌ Local OCR processing failed: {e}", exc_info=True)
|
||
|
||
logger.info(f"Step [5/6] 本地 OCR 解析完成 (总耗时: {time.time() - t_ocr_total:.2f}s) | 成功识别: {len(stations)}/{len(json_metadata.get('cards', []))}")
|
||
|
||
if not stations:
|
||
logger.warning("当前页面未检测到任何有效的场站信息")
|
||
else:
|
||
logger.info(f"Step [6/6] 准备开始逐一点击场站进入详情页 (共 {len(stations)} 个)...")
|
||
new_stations_found = False
|
||
for s_idx, s in enumerate(stations):
|
||
name = s.get("station_name")
|
||
|
||
# 过滤掉明显的非场站名称
|
||
if not name:
|
||
continue
|
||
|
||
# --- 短期去重:检查 Redis 缓存 ---
|
||
# 2分钟内如果处理过该场站,则跳过
|
||
redis_key = f"processed_station:{name}"
|
||
if await redis_kit.exists(redis_key):
|
||
logger.info(f"场站 {name} 在 2 分钟内已处理过,跳过")
|
||
continue
|
||
|
||
if name in seen_names:
|
||
continue
|
||
|
||
# 使用从 JSON 元数据中恢复的点击坐标
|
||
x = s.get("uia_center_x")
|
||
y = s.get("uia_center_y")
|
||
|
||
if x is None or y is None:
|
||
logger.warning(f"场站 {name} 缺少坐标信息,无法进入详情页")
|
||
continue
|
||
|
||
seen_names.add(name)
|
||
all_stations.append(s)
|
||
new_stations_found = True
|
||
|
||
# 打印基本信息
|
||
price = s.get("price")
|
||
pro_price = s.get("pro_price")
|
||
piles_str = s.get("piles", "")
|
||
distance_str = s.get("distance", "")
|
||
|
||
# 检查距离
|
||
dist_log_part = ""
|
||
if distance_str:
|
||
try:
|
||
import re
|
||
dist_match = re.search(r"(\d+(\.\d+)?)", distance_str)
|
||
if dist_match:
|
||
dist_val = float(dist_match.group(1))
|
||
is_km = "km" in distance_str.lower()
|
||
dist_km = dist_val if is_km else dist_val / 1000.0
|
||
|
||
# 将距离解析结果和限制检查合并到日志中
|
||
dist_log_part = f" (解析: {dist_km:.2f}km / 限: {MAX_CRAWL_DISTANCE}km)"
|
||
|
||
if dist_km >= MAX_CRAWL_DISTANCE:
|
||
logger.info(f"当前场站 {name} 距离 {dist_km}km (原始: {distance_str}) 已达到或超过限制 {MAX_CRAWL_DISTANCE}km。")
|
||
max_distance_reached = True
|
||
except Exception as e:
|
||
logger.warning(f"解析距离出错: {distance_str}, {e}")
|
||
|
||
pro_info = f" | PRO会员价: {pro_price}" if pro_price else ""
|
||
# 确保距离始终显示,如果为空则显示 "未知"
|
||
display_distance = distance_str if distance_str else "未知"
|
||
logger.info(f"正在处理第 {s_idx+1} 个场站: {name} | 价格: {price}{pro_info} | 枪: {piles_str} | 距离: {display_distance}{dist_log_part}")
|
||
|
||
if max_distance_reached:
|
||
break
|
||
|
||
# --- 点击进入详情页记录地址 ---
|
||
# 使用相对坐标点击,以适应不同分辨率
|
||
if img_w > 0 and img_h > 0:
|
||
rel_x = max(0.0, min(1.0, x / img_w))
|
||
rel_y = max(0.0, min(1.0, y / img_h))
|
||
logger.info(f"👉 正在点击进入详情页: {name} (相对坐标: {rel_x:.3f}, {rel_y:.3f})")
|
||
d.click(rel_x, rel_y)
|
||
else:
|
||
logger.info(f"👉 正在点击进入详情页: {name} (绝对坐标: {x}, {y})")
|
||
d.click(x, y)
|
||
|
||
await asyncio.sleep(WAIT_DETAIL_PAGE_LOAD) # 等待详情页加载
|
||
|
||
# 尝试获取并保存地址
|
||
address_recorded = False
|
||
for retry in range(2):
|
||
# 1. 拍摄详情页截图
|
||
t_d_shot = time.time()
|
||
detail_uuid = str(uuid.uuid4())
|
||
detail_screenshot_path = take_screenshot(d, detail_uuid, save_dir=save_dir)
|
||
logger.info(f"Step [详情页截图] 耗时: {time.time() - t_d_shot:.4f}s")
|
||
|
||
# 2. 上传至 OBS
|
||
t_d_up = time.time()
|
||
detail_object_key = f"{OBS_TMP_PREFIX}/{detail_uuid}.jpg"
|
||
up_success, _ = uploader.upload_file(detail_object_key, detail_screenshot_path)
|
||
logger.info(f"Step [上传详情页截图] 耗时: {time.time() - t_d_up:.4f}s")
|
||
|
||
if up_success:
|
||
detail_url = f"https://{CDN_DOMAIN}/{detail_object_key}"
|
||
# 3. 解析并保存地址 (异步后台处理)
|
||
# 定义异步任务函数
|
||
async def _process_address_task(task_name, task_url, task_dev_info, task_redis_key):
|
||
t_addr_task = time.time()
|
||
try:
|
||
addr_res = await service.process_station_address(task_name, task_url, device_info=task_dev_info)
|
||
logger.info(f"Step [解析并保存地址-后台] 耗时: {time.time() - t_addr_task:.4f}s")
|
||
if addr_res.get("address"):
|
||
logger.info(f"成功记录地址: {addr_res.get('address')}")
|
||
# 记录到 Redis,设置过期时间
|
||
await redis_kit.set_data(task_redis_key, "1", expire=REDIS_STATION_EXPIRE)
|
||
except Exception as e:
|
||
logger.error(f"后台处理地址失败 ({task_name}): {e}")
|
||
|
||
# 创建并启动任务
|
||
addr_task = asyncio.create_task(_process_address_task(name, detail_url, device_info, redis_key))
|
||
background_tasks.append(addr_task)
|
||
logger.info(f"已提交地址解析任务到后台: {name}")
|
||
|
||
# 标记为已记录以跳出 retry 循环
|
||
address_recorded = True
|
||
|
||
# 清理详情页临时截图
|
||
if not KEEP_SCREENSHOTS and os.path.exists(detail_screenshot_path):
|
||
os.remove(detail_screenshot_path)
|
||
|
||
break # 直接跳出 retry 循环
|
||
|
||
# --- 抓取价格时段信息 (New Feature) ---
|
||
# 仅在测试模式下或特定需求下启用
|
||
try:
|
||
logger.info(f"开始抓取价格时段信息: {name}")
|
||
|
||
# 使用几何特征识别 "全部时段" 按钮
|
||
# 临时截图
|
||
temp_uuid = "temp_find_expand"
|
||
screenshot_path = take_screenshot(d, temp_uuid, save_dir=TEMP_IMAGE_DIR)
|
||
|
||
# 尝试识别,将调试图片保存到 Images 目录
|
||
t_find = time.time()
|
||
pos = Kit.find_expand_button_position(screenshot_path, debug_dir=save_dir, debug_filename_prefix=name)
|
||
logger.info(f"Step [识别展开按钮] 耗时: {time.time() - t_find:.4f}s")
|
||
|
||
# 清理截图
|
||
try:
|
||
os.remove(screenshot_path)
|
||
except:
|
||
pass
|
||
|
||
if pos:
|
||
x, y = pos
|
||
logger.info(f"通过几何特征找到 '全部时段' 按钮: ({x}, {y})")
|
||
d.click(x, y)
|
||
await asyncio.sleep(1.5)
|
||
|
||
# 抓取3屏截图
|
||
price_image_urls = []
|
||
for p_idx in range(1, 4):
|
||
# 使用场站名称的MD5值作为文件名前缀,避免中文和特殊字符导致的URL问题
|
||
t_p_loop = time.time()
|
||
name_md5 = hashlib.md5(name.encode('utf-8')).hexdigest()
|
||
p_uuid = f"{name_md5}_price_{p_idx}"
|
||
p_path = take_screenshot(d, p_uuid, save_dir=save_dir)
|
||
logger.info(f"已保存价格时段截图 {p_idx} (Station: {name}, MD5: {name_md5}): {p_path}")
|
||
|
||
# 上传到 OBS
|
||
p_object_key = f"{OBS_TMP_PREFIX}/{p_uuid}.jpg"
|
||
success, _ = uploader.upload_file(p_object_key, p_path)
|
||
if success:
|
||
p_url = f"https://{CDN_DOMAIN}/{p_object_key}"
|
||
price_image_urls.append(p_url)
|
||
logger.info(f"截图上传成功: {p_url}")
|
||
else:
|
||
logger.warning(f"截图上传失败: {p_path}")
|
||
|
||
logger.info(f"Step [价格截图与上传-{p_idx}] 耗时: {time.time() - t_p_loop:.4f}s")
|
||
|
||
if p_idx < 3:
|
||
d.swipe_ext("up", scale=0.7)
|
||
await asyncio.sleep(1.0)
|
||
|
||
# 调用服务解析价格时段 (异步后台处理)
|
||
if price_image_urls:
|
||
logger.info(f"正在调用接口解析电费时段数据 ({len(price_image_urls)} 张图片) - 转入后台...")
|
||
|
||
async def _process_price_task(task_name, task_urls, task_dev_info):
|
||
t_price_task = time.time()
|
||
try:
|
||
await service.process_price_schedule(task_name, task_urls, device_info=task_dev_info)
|
||
logger.info(f"Step [解析电费时段数据-后台] 耗时: {time.time() - t_price_task:.4f}s")
|
||
logger.info(f"电费时段数据解析完成: {task_name}")
|
||
except Exception as e:
|
||
logger.error(f"后台处理价格时段失败 ({task_name}): {e}")
|
||
|
||
price_task = asyncio.create_task(_process_price_task(name, price_image_urls, device_info))
|
||
background_tasks.append(price_task)
|
||
else:
|
||
logger.warning("未能获取到有效的价格时段截图,跳过解析")
|
||
else:
|
||
logger.warning(f"未能通过几何特征识别 '全部时段' 按钮")
|
||
|
||
except Exception as e:
|
||
logger.warning(f"抓取价格时段信息时发生异常: {e}")
|
||
|
||
# 测试模式:仅处理第一个场站
|
||
# logger.info("测试模式:仅处理第一个场站,即将退出 (Y3)")
|
||
# return all_stations
|
||
|
||
# 返回列表页
|
||
d.press("back")
|
||
await asyncio.sleep(WAIT_BACK_TO_LIST) # 等待列表页重新稳定
|
||
|
||
if not new_stations_found and i > 0:
|
||
logger.info("未发现更多新场站,停止下拉。")
|
||
# 如果需要强行爬取所有,可以注释掉 break
|
||
# break
|
||
|
||
# 5. 如果还没到最后一页,执行下拉
|
||
if i < max_scrolls and not max_distance_reached:
|
||
logger.info(f"执行下拉翻页 (距离比例: {SCROLL_DISTANCE_RATIO})...")
|
||
# 根据配置的比例计算滑动起始和终点
|
||
# 保证滑动在屏幕中心区域进行
|
||
start_y = 0.5 + (SCROLL_DISTANCE_RATIO / 2)
|
||
end_y = 0.5 - (SCROLL_DISTANCE_RATIO / 2)
|
||
d.swipe(w * 0.5, h * start_y, w * 0.5, h * end_y, duration=0.5)
|
||
await asyncio.sleep(WAIT_AFTER_SCROLL) # 等待页面加载和列表稳定
|
||
|
||
# 清理列表页截图
|
||
if not KEEP_SCREENSHOTS:
|
||
if os.path.exists(screenshot_path):
|
||
os.remove(screenshot_path)
|
||
if os.path.exists(vl_img_path):
|
||
os.remove(vl_img_path)
|
||
if os.path.exists(json_path):
|
||
os.remove(json_path)
|
||
|
||
logger.info(f"爬取任务结束,正在等待 {len(background_tasks)} 个后台处理任务完成...")
|
||
if background_tasks:
|
||
await asyncio.gather(*background_tasks)
|
||
logger.info("所有后台任务已完成。")
|
||
|
||
logger.info(f"任务结束,共采集到 {len(all_stations)} 个场站。停止原因: {stop_reason}")
|
||
return all_stations
|
||
|
||
def clean_images_dir():
|
||
"""清理 Images 目录下除保留文件外的所有文件"""
|
||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
||
images_dir = os.path.join(base_dir, "Images")
|
||
|
||
if not os.path.exists(images_dir):
|
||
return
|
||
|
||
keep_files = {'1.jpg', '2.jpg', '3.jpg', '4.jpg'}
|
||
logger.info(f"正在清理 Images 目录 (保留: {keep_files})...")
|
||
|
||
deleted_count = 0
|
||
for filename in os.listdir(images_dir):
|
||
if filename in keep_files:
|
||
continue
|
||
|
||
file_path = os.path.join(images_dir, filename)
|
||
try:
|
||
if os.path.isfile(file_path):
|
||
os.remove(file_path)
|
||
deleted_count += 1
|
||
except Exception as e:
|
||
logger.warning(f"删除文件失败 {filename}: {e}")
|
||
|
||
logger.info(f"清理完成,共删除 {deleted_count} 个文件。")
|
||
|
||
async def clean_redis_data(redis_kit):
|
||
"""清除 Redis 中的场站处理记录"""
|
||
if TEST_CLEAR_REDIS:
|
||
logger.info("测试模式开启:正在清除 Redis 中的场站处理记录 (processed_station:*)...")
|
||
keys = await redis_kit.keys("processed_station:*")
|
||
if keys:
|
||
count = await redis_kit.delete(*keys)
|
||
logger.info(f"已清除 {count} 条场站处理记录")
|
||
else:
|
||
logger.info("未发现旧的场站处理记录")
|
||
|
||
async def main(service=None, do_cleanup=True):
|
||
# 连接设备
|
||
d = u2.connect()
|
||
|
||
# 初始化服务
|
||
should_close_service = False
|
||
if service is None:
|
||
service = XinDianTuService()
|
||
should_close_service = True
|
||
# 初始化数据库连接(XinDianTuService 需要)
|
||
await service.init_db()
|
||
|
||
uploader = ObsUploader()
|
||
redis_kit = RedisKit()
|
||
|
||
try:
|
||
if do_cleanup:
|
||
# 清理图片目录
|
||
clean_images_dir()
|
||
|
||
# [Testing] 如果配置为测试模式,启动时清除 Redis 记录
|
||
await clean_redis_data(redis_kit)
|
||
|
||
# 清理过期数据
|
||
# await service.cleanup_old_data()
|
||
|
||
# 获取场站列表
|
||
stations = await get_station_list(d, service, uploader, max_scrolls=MAX_SCROLLS)
|
||
|
||
if stations:
|
||
logger.info("场站列表采集完成。")
|
||
else:
|
||
logger.warning("未采集到任何场站信息。")
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
logger.exception(f"运行过程中出现异常: {e}")
|
||
return False
|
||
finally:
|
||
# 关闭数据库连接
|
||
if should_close_service:
|
||
await service.close_db()
|
||
|
||
if __name__ == "__main__":
|
||
try:
|
||
asyncio.run(main())
|
||
except KeyboardInterrupt:
|
||
logger.info("程序被用户中断.")
|
||
except Exception as e:
|
||
logger.exception(f"程序崩溃: {e}")
|