179 lines
5.2 KiB
Bash
179 lines
5.2 KiB
Bash
#!/bin/bash
|
||
|
||
# 配置路径
|
||
DATAX_HOME="/usr/local/datax"
|
||
# 获取当前脚本所在目录
|
||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
|
||
# JSON配置文件目录指向 json_inc
|
||
CONF_DIR="$SCRIPT_DIR/json_inc"
|
||
BIN_DIR="$DATAX_HOME/bin"
|
||
DATAX_PY="$BIN_DIR/datax.py"
|
||
|
||
# 明确指定要执行的9个文件(增量同步)
|
||
JOBS=(
|
||
"t_equipment_charge_order.json"
|
||
"t_equipment_charge_order_detail.json"
|
||
"t_account_recharge.json"
|
||
"t_account_water.json"
|
||
"t_car.json"
|
||
"t_company.json"
|
||
"t_company_info_value.json"
|
||
"t_connector.json"
|
||
"t_equipment.json",
|
||
"t_station.json",
|
||
"t_ext_hurry_quit.json",
|
||
"t_time_day.json",
|
||
"t_user.json",
|
||
"t_user_account.json",
|
||
"t_user_upload_fault.json"
|
||
)
|
||
|
||
# 定义全局参数
|
||
PARAMS="-Dsrc_user=ylt -Dsrc_pwd=Ycharge666 -Dsrc_jdbc=jdbc:mysql://rm-bp1ux6tuk49er80t9xo.mysql.rds.aliyuncs.com:3306/yltcharge -Ddest_user=root -Ddest_pwd=DsideaL147258369 -Ddest_load_url=10.10.14.204:8030 -Ddest_jdbc=jdbc:mysql://10.10.14.204:9030/yltcharge"
|
||
|
||
|
||
# 在脚本最开始记录开始时间
|
||
SCRIPT_START_TIME=$(date '+%Y-%m-%d %H:%M:%S')
|
||
SCRIPT_START_TIMESTAMP=$(date +%s)
|
||
|
||
echo "DataX增量同步脚本 (Incremental Sync)"
|
||
echo "=========================="
|
||
echo "DataX路径: $DATAX_PY"
|
||
echo "配置文件目录: $CONF_DIR"
|
||
echo "任务数量: ${#JOBS[@]}"
|
||
echo "脚本开始时间: $SCRIPT_START_TIME"
|
||
echo "注意: 增量模式下不执行 Truncate,请确保目标表为 Unique Key 模型以实现 Upsert 效果"
|
||
echo "策略: 每次同步最近10000条数据(按ID倒序),利用Doris主键机制自动覆盖更新"
|
||
echo "=========================="
|
||
|
||
# 切换到配置文件目录
|
||
cd "$CONF_DIR" || {
|
||
echo "错误: 无法进入配置文件目录 $CONF_DIR"
|
||
exit 1
|
||
}
|
||
|
||
echo "当前目录: $(pwd)"
|
||
echo
|
||
|
||
# 执行计数器
|
||
SUCCESS_COUNT=0
|
||
FAIL_COUNT=0
|
||
TOTAL=${#JOBS[@]}
|
||
CURRENT=0
|
||
|
||
# 数组存储每个任务的执行详情
|
||
declare -a TASK_RESULTS=()
|
||
declare -a TASK_DURATIONS=()
|
||
declare -a TASK_STATUSES=()
|
||
|
||
for JOB in "${JOBS[@]}"; do
|
||
CURRENT=$((CURRENT + 1))
|
||
|
||
echo "任务 [$CURRENT/$TOTAL] $JOB"
|
||
echo "任务开始时间: $(date '+%H:%M:%S')"
|
||
echo "----------------------------------------"
|
||
|
||
# 检查文件是否存在
|
||
if [ ! -f "$JOB" ]; then
|
||
echo "✗ 错误: 文件不存在 - $JOB"
|
||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||
TASK_RESULTS+=("$JOB")
|
||
TASK_STATUSES+=("文件不存在")
|
||
TASK_DURATIONS+=("0")
|
||
echo "----------------------------------------"
|
||
continue
|
||
fi
|
||
|
||
# 记录开始时间(用于计算耗时)
|
||
START_TIME=$(date +%s)
|
||
|
||
# 执行任务 (添加 -p 参数传递变量)
|
||
/usr/bin/python "$DATAX_PY" -p "$PARAMS" "$JOB"
|
||
EXIT_CODE=$?
|
||
|
||
# 计算耗时
|
||
END_TIME=$(date +%s)
|
||
DURATION=$((END_TIME - START_TIME))
|
||
|
||
# 保存任务详情
|
||
TASK_RESULTS+=("$JOB")
|
||
TASK_DURATIONS+=("$DURATION")
|
||
|
||
# 处理执行结果
|
||
if [ $EXIT_CODE -eq 0 ]; then
|
||
echo "✓ 成功: $JOB (耗时: ${DURATION}秒)"
|
||
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
|
||
TASK_STATUSES+=("成功")
|
||
else
|
||
echo "✗ 失败: $JOB (耗时: ${DURATION}秒)"
|
||
echo "错误代码: $EXIT_CODE"
|
||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||
TASK_STATUSES+=("失败")
|
||
# 增量任务通常无人值守,失败不暂停
|
||
fi
|
||
|
||
echo "任务结束时间: $(date '+%H:%M:%S')"
|
||
echo "----------------------------------------"
|
||
echo
|
||
|
||
# 可选:任务间等待时间(秒)
|
||
# sleep 1
|
||
done
|
||
|
||
# 记录脚本结束时间
|
||
SCRIPT_END_TIME=$(date '+%Y-%m-%d %H:%M:%S')
|
||
SCRIPT_END_TIMESTAMP=$(date +%s)
|
||
TOTAL_DURATION=$((SCRIPT_END_TIMESTAMP - SCRIPT_START_TIMESTAMP))
|
||
|
||
echo "=========================="
|
||
echo "执行结果汇总 (Incremental Sync)"
|
||
echo "=========================="
|
||
echo "脚本开始时间: $SCRIPT_START_TIME"
|
||
echo "脚本结束时间: $SCRIPT_END_TIME"
|
||
echo "脚本总耗时: ${TOTAL_DURATION}秒"
|
||
echo "=========================="
|
||
echo "任务执行详情:"
|
||
echo "----------------------------------------"
|
||
|
||
# 输出每个任务的执行结果
|
||
for i in "${!TASK_RESULTS[@]}"; do
|
||
STATUS_ICON="✓"
|
||
if [ "${TASK_STATUSES[$i]}" = "失败" ] || [ "${TASK_STATUSES[$i]}" = "文件不存在" ]; then
|
||
STATUS_ICON="✗"
|
||
fi
|
||
printf "%-40s %-2s %-10s %-8s秒\n" \
|
||
"${TASK_RESULTS[$i]}" \
|
||
"$STATUS_ICON" \
|
||
"${TASK_STATUSES[$i]}" \
|
||
"${TASK_DURATIONS[$i]}"
|
||
done
|
||
|
||
echo "----------------------------------------"
|
||
echo "汇总统计:"
|
||
echo "总任务数: $TOTAL"
|
||
echo "成功: $SUCCESS_COUNT"
|
||
echo "失败: $FAIL_COUNT"
|
||
if [ $TOTAL -gt 0 ]; then
|
||
SUCCESS_RATE=$((SUCCESS_COUNT * 100 / TOTAL))
|
||
echo "成功率: ${SUCCESS_RATE}%"
|
||
fi
|
||
|
||
# 计算平均耗时
|
||
if [ ${#TASK_DURATIONS[@]} -gt 0 ]; then
|
||
TOTAL_TASK_TIME=0
|
||
VALID_TASKS=0
|
||
for duration in "${TASK_DURATIONS[@]}"; do
|
||
if [ "$duration" -gt 0 ]; then
|
||
TOTAL_TASK_TIME=$((TOTAL_TASK_TIME + duration))
|
||
VALID_TASKS=$((VALID_TASKS + 1))
|
||
fi
|
||
done
|
||
|
||
if [ $VALID_TASKS -gt 0 ]; then
|
||
AVG_DURATION=$((TOTAL_TASK_TIME / VALID_TASKS))
|
||
echo "平均任务耗时: ${AVG_DURATION}秒"
|
||
fi
|
||
fi
|
||
|
||
echo "=========================="
|