Files
aiData/DataX/run_jobs_inc.sh
HuangHai f7d55a64dc 'commit'
2026-01-20 13:44:45 +08:00

179 lines
5.2 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 配置路径
DATAX_HOME="/usr/local/datax"
# 获取当前脚本所在目录
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
# JSON配置文件目录指向 json_inc
CONF_DIR="$SCRIPT_DIR/json_inc"
BIN_DIR="$DATAX_HOME/bin"
DATAX_PY="$BIN_DIR/datax.py"
# 明确指定要执行的9个文件增量同步
JOBS=(
"t_equipment_charge_order.json"
"t_equipment_charge_order_detail.json"
"t_account_recharge.json"
"t_account_water.json"
"t_car.json"
"t_company.json"
"t_company_info_value.json"
"t_connector.json"
"t_equipment.json",
"t_station.json",
"t_ext_hurry_quit.json",
"t_time_day.json",
"t_user.json",
"t_user_account.json",
"t_user_upload_fault.json"
)
# 定义全局参数
PARAMS="-Dsrc_user=ylt -Dsrc_pwd=Ycharge666 -Dsrc_jdbc=jdbc:mysql://rm-bp1ux6tuk49er80t9xo.mysql.rds.aliyuncs.com:3306/yltcharge -Ddest_user=root -Ddest_pwd=DsideaL147258369 -Ddest_load_url=10.10.14.204:8030 -Ddest_jdbc=jdbc:mysql://10.10.14.204:9030/yltcharge"
# 在脚本最开始记录开始时间
SCRIPT_START_TIME=$(date '+%Y-%m-%d %H:%M:%S')
SCRIPT_START_TIMESTAMP=$(date +%s)
echo "DataX增量同步脚本 (Incremental Sync)"
echo "=========================="
echo "DataX路径: $DATAX_PY"
echo "配置文件目录: $CONF_DIR"
echo "任务数量: ${#JOBS[@]}"
echo "脚本开始时间: $SCRIPT_START_TIME"
echo "注意: 增量模式下不执行 Truncate请确保目标表为 Unique Key 模型以实现 Upsert 效果"
echo "策略: 每次同步最近10000条数据(按ID倒序)利用Doris主键机制自动覆盖更新"
echo "=========================="
# 切换到配置文件目录
cd "$CONF_DIR" || {
echo "错误: 无法进入配置文件目录 $CONF_DIR"
exit 1
}
echo "当前目录: $(pwd)"
echo
# 执行计数器
SUCCESS_COUNT=0
FAIL_COUNT=0
TOTAL=${#JOBS[@]}
CURRENT=0
# 数组存储每个任务的执行详情
declare -a TASK_RESULTS=()
declare -a TASK_DURATIONS=()
declare -a TASK_STATUSES=()
for JOB in "${JOBS[@]}"; do
CURRENT=$((CURRENT + 1))
echo "任务 [$CURRENT/$TOTAL] $JOB"
echo "任务开始时间: $(date '+%H:%M:%S')"
echo "----------------------------------------"
# 检查文件是否存在
if [ ! -f "$JOB" ]; then
echo "✗ 错误: 文件不存在 - $JOB"
FAIL_COUNT=$((FAIL_COUNT + 1))
TASK_RESULTS+=("$JOB")
TASK_STATUSES+=("文件不存在")
TASK_DURATIONS+=("0")
echo "----------------------------------------"
continue
fi
# 记录开始时间(用于计算耗时)
START_TIME=$(date +%s)
# 执行任务 (添加 -p 参数传递变量)
/usr/bin/python "$DATAX_PY" -p "$PARAMS" "$JOB"
EXIT_CODE=$?
# 计算耗时
END_TIME=$(date +%s)
DURATION=$((END_TIME - START_TIME))
# 保存任务详情
TASK_RESULTS+=("$JOB")
TASK_DURATIONS+=("$DURATION")
# 处理执行结果
if [ $EXIT_CODE -eq 0 ]; then
echo "✓ 成功: $JOB (耗时: ${DURATION}秒)"
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
TASK_STATUSES+=("成功")
else
echo "✗ 失败: $JOB (耗时: ${DURATION}秒)"
echo "错误代码: $EXIT_CODE"
FAIL_COUNT=$((FAIL_COUNT + 1))
TASK_STATUSES+=("失败")
# 增量任务通常无人值守,失败不暂停
fi
echo "任务结束时间: $(date '+%H:%M:%S')"
echo "----------------------------------------"
echo
# 可选:任务间等待时间(秒)
# sleep 1
done
# 记录脚本结束时间
SCRIPT_END_TIME=$(date '+%Y-%m-%d %H:%M:%S')
SCRIPT_END_TIMESTAMP=$(date +%s)
TOTAL_DURATION=$((SCRIPT_END_TIMESTAMP - SCRIPT_START_TIMESTAMP))
echo "=========================="
echo "执行结果汇总 (Incremental Sync)"
echo "=========================="
echo "脚本开始时间: $SCRIPT_START_TIME"
echo "脚本结束时间: $SCRIPT_END_TIME"
echo "脚本总耗时: ${TOTAL_DURATION}"
echo "=========================="
echo "任务执行详情:"
echo "----------------------------------------"
# 输出每个任务的执行结果
for i in "${!TASK_RESULTS[@]}"; do
STATUS_ICON="✓"
if [ "${TASK_STATUSES[$i]}" = "失败" ] || [ "${TASK_STATUSES[$i]}" = "文件不存在" ]; then
STATUS_ICON="✗"
fi
printf "%-40s %-2s %-10s %-8s秒\n" \
"${TASK_RESULTS[$i]}" \
"$STATUS_ICON" \
"${TASK_STATUSES[$i]}" \
"${TASK_DURATIONS[$i]}"
done
echo "----------------------------------------"
echo "汇总统计:"
echo "总任务数: $TOTAL"
echo "成功: $SUCCESS_COUNT"
echo "失败: $FAIL_COUNT"
if [ $TOTAL -gt 0 ]; then
SUCCESS_RATE=$((SUCCESS_COUNT * 100 / TOTAL))
echo "成功率: ${SUCCESS_RATE}%"
fi
# 计算平均耗时
if [ ${#TASK_DURATIONS[@]} -gt 0 ]; then
TOTAL_TASK_TIME=0
VALID_TASKS=0
for duration in "${TASK_DURATIONS[@]}"; do
if [ "$duration" -gt 0 ]; then
TOTAL_TASK_TIME=$((TOTAL_TASK_TIME + duration))
VALID_TASKS=$((VALID_TASKS + 1))
fi
done
if [ $VALID_TASKS -gt 0 ]; then
AVG_DURATION=$((TOTAL_TASK_TIME / VALID_TASKS))
echo "平均任务耗时: ${AVG_DURATION}"
fi
fi
echo "=========================="