#!/bin/bash # 配置路径 DATAX_HOME="/usr/local/datax" # 获取当前脚本所在目录 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" # JSON配置文件目录指向 json_inc CONF_DIR="$SCRIPT_DIR/json_inc" BIN_DIR="$DATAX_HOME/bin" DATAX_PY="$BIN_DIR/datax.py" # 明确指定要执行的9个文件(增量同步) JOBS=( "t_equipment_charge_order.json" "t_equipment_charge_order_detail.json" "t_account_recharge.json" "t_account_water.json" "t_car.json" "t_company.json" "t_company_info_value.json" "t_connector.json" "t_equipment.json", "t_station.json", "t_ext_hurry_quit.json", "t_time_day.json", "t_user.json", "t_user_account.json", "t_user_upload_fault.json" ) # 定义全局参数 PARAMS="-Dsrc_user=ylt -Dsrc_pwd=Ycharge666 -Dsrc_jdbc=jdbc:mysql://rm-bp1ux6tuk49er80t9xo.mysql.rds.aliyuncs.com:3306/yltcharge -Ddest_user=root -Ddest_pwd=DsideaL147258369 -Ddest_load_url=10.10.14.204:8030 -Ddest_jdbc=jdbc:mysql://10.10.14.204:9030/yltcharge" # 在脚本最开始记录开始时间 SCRIPT_START_TIME=$(date '+%Y-%m-%d %H:%M:%S') SCRIPT_START_TIMESTAMP=$(date +%s) echo "DataX增量同步脚本 (Incremental Sync)" echo "==========================" echo "DataX路径: $DATAX_PY" echo "配置文件目录: $CONF_DIR" echo "任务数量: ${#JOBS[@]}" echo "脚本开始时间: $SCRIPT_START_TIME" echo "注意: 增量模式下不执行 Truncate,请确保目标表为 Unique Key 模型以实现 Upsert 效果" echo "策略: 每次同步最近10000条数据(按ID倒序),利用Doris主键机制自动覆盖更新" echo "==========================" # 切换到配置文件目录 cd "$CONF_DIR" || { echo "错误: 无法进入配置文件目录 $CONF_DIR" exit 1 } echo "当前目录: $(pwd)" echo # 执行计数器 SUCCESS_COUNT=0 FAIL_COUNT=0 TOTAL=${#JOBS[@]} CURRENT=0 # 数组存储每个任务的执行详情 declare -a TASK_RESULTS=() declare -a TASK_DURATIONS=() declare -a TASK_STATUSES=() for JOB in "${JOBS[@]}"; do CURRENT=$((CURRENT + 1)) echo "任务 [$CURRENT/$TOTAL] $JOB" echo "任务开始时间: $(date '+%H:%M:%S')" echo "----------------------------------------" # 检查文件是否存在 if [ ! -f "$JOB" ]; then echo "✗ 错误: 文件不存在 - $JOB" FAIL_COUNT=$((FAIL_COUNT + 1)) TASK_RESULTS+=("$JOB") TASK_STATUSES+=("文件不存在") TASK_DURATIONS+=("0") echo "----------------------------------------" continue fi # 记录开始时间(用于计算耗时) START_TIME=$(date +%s) # 执行任务 (添加 -p 参数传递变量) /usr/bin/python "$DATAX_PY" -p "$PARAMS" "$JOB" EXIT_CODE=$? # 计算耗时 END_TIME=$(date +%s) DURATION=$((END_TIME - START_TIME)) # 保存任务详情 TASK_RESULTS+=("$JOB") TASK_DURATIONS+=("$DURATION") # 处理执行结果 if [ $EXIT_CODE -eq 0 ]; then echo "✓ 成功: $JOB (耗时: ${DURATION}秒)" SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) TASK_STATUSES+=("成功") else echo "✗ 失败: $JOB (耗时: ${DURATION}秒)" echo "错误代码: $EXIT_CODE" FAIL_COUNT=$((FAIL_COUNT + 1)) TASK_STATUSES+=("失败") # 增量任务通常无人值守,失败不暂停 fi echo "任务结束时间: $(date '+%H:%M:%S')" echo "----------------------------------------" echo # 可选:任务间等待时间(秒) # sleep 1 done # 记录脚本结束时间 SCRIPT_END_TIME=$(date '+%Y-%m-%d %H:%M:%S') SCRIPT_END_TIMESTAMP=$(date +%s) TOTAL_DURATION=$((SCRIPT_END_TIMESTAMP - SCRIPT_START_TIMESTAMP)) echo "==========================" echo "执行结果汇总 (Incremental Sync)" echo "==========================" echo "脚本开始时间: $SCRIPT_START_TIME" echo "脚本结束时间: $SCRIPT_END_TIME" echo "脚本总耗时: ${TOTAL_DURATION}秒" echo "==========================" echo "任务执行详情:" echo "----------------------------------------" # 输出每个任务的执行结果 for i in "${!TASK_RESULTS[@]}"; do STATUS_ICON="✓" if [ "${TASK_STATUSES[$i]}" = "失败" ] || [ "${TASK_STATUSES[$i]}" = "文件不存在" ]; then STATUS_ICON="✗" fi printf "%-40s %-2s %-10s %-8s秒\n" \ "${TASK_RESULTS[$i]}" \ "$STATUS_ICON" \ "${TASK_STATUSES[$i]}" \ "${TASK_DURATIONS[$i]}" done echo "----------------------------------------" echo "汇总统计:" echo "总任务数: $TOTAL" echo "成功: $SUCCESS_COUNT" echo "失败: $FAIL_COUNT" if [ $TOTAL -gt 0 ]; then SUCCESS_RATE=$((SUCCESS_COUNT * 100 / TOTAL)) echo "成功率: ${SUCCESS_RATE}%" fi # 计算平均耗时 if [ ${#TASK_DURATIONS[@]} -gt 0 ]; then TOTAL_TASK_TIME=0 VALID_TASKS=0 for duration in "${TASK_DURATIONS[@]}"; do if [ "$duration" -gt 0 ]; then TOTAL_TASK_TIME=$((TOTAL_TASK_TIME + duration)) VALID_TASKS=$((VALID_TASKS + 1)) fi done if [ $VALID_TASKS -gt 0 ]; then AVG_DURATION=$((TOTAL_TASK_TIME / VALID_TASKS)) echo "平均任务耗时: ${AVG_DURATION}秒" fi fi echo "=========================="