This commit is contained in:
HuangHai
2026-01-15 07:40:53 +08:00
parent 3e9fc3737e
commit a61e18bf26
6 changed files with 43 additions and 266 deletions

104
Tools/T5_ClearHistory.py Normal file
View File

@@ -0,0 +1,104 @@
import asyncio
import os
import sys
import logging
# 确保项目根目录在 sys.path 中,并且优先于外部同名模块
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
if project_root not in sys.path:
sys.path.insert(0, project_root)
from Util import Win32Patch
from DbKit.Db import Db
from Util.RedisKit import RedisKit
from sqlalchemy.sql import text
try:
from Config.Config import DB_URL
except ModuleNotFoundError:
import importlib.util
_root_dir_cfg = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
_config_path_cfg = os.path.join(_root_dir_cfg, "Config", "Config.py")
_spec_cfg = importlib.util.spec_from_file_location("project_config_fallback", _config_path_cfg)
_cfg_mod = importlib.util.module_from_spec(_spec_cfg)
assert _spec_cfg.loader is not None
_spec_cfg.loader.exec_module(_cfg_mod)
DB_URL = _cfg_mod.DB_URL
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger("ClearAiTeJiYiChong")
async def main():
"""
1. 删除数据库中所有 operator='特来电' 的记录
2. 删除 Redis 中所有关于艾特吉易充的缓存信息
"""
operator = '特来电'
# 1. 数据库清理
logger.info(f"开始清理数据库中 operator='{operator}' 的数据...")
db = Db(db_url=DB_URL)
await db.init_db()
try:
async with db.AsyncSessionLocal() as session:
async with session.begin():
# 先删除从表记录(通过 station_hash 关联)
# 1. t_station_status_scd
sql_status = """
DELETE FROM t_station_status_scd
WHERE station_hash IN (
SELECT station_hash FROM t_station_profile_scd WHERE operator = :operator
)
"""
logger.info("正在清理 t_station_status_scd...")
result_status = await session.execute(text(sql_status), {"operator": operator})
logger.info(f"t_station_status_scd 已删除 {result_status.rowcount} 行记录。")
# 2. t_station_price_schedule_scd
sql_price = """
DELETE FROM t_station_price_schedule_scd
WHERE station_hash IN (
SELECT station_hash FROM t_station_profile_scd WHERE operator = :operator
)
"""
logger.info("正在清理 t_station_price_schedule_scd...")
result_price = await session.execute(text(sql_price), {"operator": operator})
logger.info(f"t_station_price_schedule_scd 已删除 {result_price.rowcount} 行记录。")
# 3. 最后删除主表 t_station_profile_scd
sql_profile = "DELETE FROM t_station_profile_scd WHERE operator = :operator"
logger.info("正在清理 t_station_profile_scd...")
result_profile = await session.execute(text(sql_profile), {"operator": operator})
logger.info(f"t_station_profile_scd 已删除 {result_profile.rowcount} 行记录。")
logger.info("数据库记录清理完成。")
except Exception as e:
logger.error(f"数据库清理失败: {e}")
finally:
# 3. 资源清理
logger.info("正在关闭数据库连接...")
await db.close()
# 2. Redis 清理
logger.info("开始清理 Redis 中的缓存数据...")
redis_kit = RedisKit()
# 根据特来电爬虫的约定Redis 去重键的模式为 crawled:tld:*
pattern = "crawled:tld:*"
try:
keys = await redis_kit.keys(pattern)
if keys:
logger.info(f"匹配到 {len(keys)} 个键,正在删除...")
await redis_kit.delete(*keys)
logger.info("Redis 缓存清理完成。")
else:
logger.info(f"未匹配到模式为 '{pattern}' 的键。")
except Exception as e:
logger.error(f"Redis 清理失败: {e}")
# RedisKit 目前是单例且内部使用线程池,通常不需要显式关闭,
# 但如果未来改为异步驱动,则需要在此处添加清理逻辑
if __name__ == "__main__":
Win32Patch.patch()
asyncio.run(main())