Files
aiData/DataX/run_jobs_full.sh
HuangHai 4592b0da6f 'commit'
2026-02-05 15:25:57 +08:00

142 lines
4.1 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 环境变量补丁
export PATH=$PATH:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin
[ -f /etc/profile ] && . /etc/profile
[ -f ~/.bash_profile ] && . ~/.bash_profile
[ -f ~/.profile ] && . ~/.profile
# 检查参数
TARGET=$1
if [ "$TARGET" != "doris" ] && [ "$TARGET" != "mysql" ]; then
echo "使用方法: $0 [doris|mysql]"
exit 1
fi
# 配置文件
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
CONF_DIR="$SCRIPT_DIR/json"
CONFIG_FILE="$SCRIPT_DIR/db_config.sh"
# 加载数据库配置
if [ -f "$CONFIG_FILE" ]; then
source "$CONFIG_FILE"
else
echo "错误: 找不到配置文件 $CONFIG_FILE"
exit 1
fi
DATAX_HOME="/usr/local/datax"
BIN_DIR="$DATAX_HOME/bin"
DATAX_PY="$BIN_DIR/datax.py"
# 明确指定要执行的15个文件
JOBS=(
"t_equipment_charge_order.json"
"t_equipment_charge_order_detail.json"
"t_account_recharge.json"
"t_account_water.json"
"t_car.json"
"t_company.json"
"t_company_info_value.json"
"t_connector.json"
"t_equipment.json"
"t_station.json"
"t_ext_hurry_quit.json"
"t_time_day.json"
"t_user.json"
"t_user_account.json"
"t_user_upload_fault.json"
)
# 日志配置
LOG_DIR="$SCRIPT_DIR/logs"
if [ ! -d "$LOG_DIR" ]; then
mkdir -p "$LOG_DIR"
fi
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOG_FILE="$LOG_DIR/run_full_${TARGET}_${TIMESTAMP}.log"
echo "日志将同时记录到: $LOG_FILE"
# 根据目标设置参数
if [ "$TARGET" == "doris" ]; then
DEST_PARAMS="$DORIS_DEST_PARAMS"
TITLE="MySQL -> Doris Full Sync"
else
DEST_PARAMS="$MYSQL_DEST_PARAMS"
TITLE="MySQL -> MySQL Full Sync"
fi
# 在脚本最开始记录开始时间
SCRIPT_START_TIME=$(date '+%Y-%m-%d %H:%M:%S')
SCRIPT_START_TIMESTAMP=$(date +%s)
echo "DataX全量同步脚本 ($TITLE)"
echo "=========================="
# 切换到配置文件目录
cd "$CONF_DIR" || {
echo "错误: 无法进入配置文件目录 $CONF_DIR"
exit 1
}
SUCCESS_COUNT=0
FAIL_COUNT=0
TOTAL=${#JOBS[@]}
declare -a TASK_RESULTS=()
declare -a TASK_DURATIONS=()
declare -a TASK_STATUSES=()
for JOB in "${JOBS[@]}"; do
TABLE_NAME="${JOB%.json}"
echo "任务 [$((SUCCESS_COUNT + FAIL_COUNT + 1))/$TOTAL] $JOB"
# 动态构造全量 SQLSELECT * FROM table
QUERY_SQL="SELECT * FROM $TABLE_NAME"
# 动态构造清空 SQLTRUNCATE TABLE table
PRE_SQL="TRUNCATE TABLE $TABLE_NAME"
# 注意:这里需要仔细处理参数引用,将变量作为整体传递
# 优化:全量同步时表已清空,强制使用 insert 模式以提升性能(覆盖 db_config.sh 中的 replace 模式)
OPTIMIZED_DEST_PARAMS="${DEST_PARAMS/dest_write_mode=replace/dest_write_mode=insert}"
PARAMS="$SRC_BASE_PARAMS $OPTIMIZED_DEST_PARAMS -Dsrc_query_sql=\"$QUERY_SQL\" -Ddest_pre_sql=\"$PRE_SQL\""
# 打印最终的执行命令(去除敏感信息后),用于调试
# echo "执行命令: python $DATAX_PY -p \"$PARAMS\" $JOB"
START_TIME=$(date +%s)
# 使用 eval 来正确处理带引号的参数,或者直接修改调用方式
# 由于PARAMS中包含空格和引号直接引用可能会导致参数解析错误
# 最稳妥的方式是将 -p 后面的参数用单引号包起来,但内部又有双引号,比较麻烦
# 我们可以尝试直接将所有参数拼接好让python去解析
/usr/bin/python "$DATAX_PY" -p "$PARAMS" "$JOB" 2>&1 | tee -a "$LOG_FILE"
EXIT_CODE=${PIPESTATUS[0]}
END_TIME=$(date +%s)
DURATION=$((END_TIME - START_TIME))
TASK_RESULTS+=("$JOB")
TASK_DURATIONS+=("$DURATION")
if [ $EXIT_CODE -eq 0 ]; then
echo "✓ 成功: $JOB (耗时: ${DURATION}秒)"
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
TASK_STATUSES+=("成功")
else
echo "✗ 失败: $JOB (耗时: ${DURATION}秒)"
FAIL_COUNT=$((FAIL_COUNT + 1))
TASK_STATUSES+=("失败")
fi
done
# ... (汇总逻辑与之前相同)
echo "=========================="
echo "执行结果汇总 (Full Sync to $TARGET)"
echo "=========================="
echo "总任务数: $TOTAL"
echo "成功: $SUCCESS_COUNT"
echo "失败: $FAIL_COUNT"
echo "=========================="