Add influxdb export/import + add new influxdb instance for prod

This commit is contained in:
2025-10-28 12:56:42 +07:00
parent ffe1bed051
commit 5cef270d64
5 changed files with 993 additions and 2 deletions

View File

@@ -0,0 +1,378 @@
#!/bin/bash
# InfluxDB CSV Data Import Script
# Usage: ./import-csv-data.sh
set -e # Exit on any error
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Get the directory where the script is located
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
PROJECT_ROOT="$(dirname "$(dirname "$SCRIPT_DIR")")"
SRC_DIR="$PROJECT_ROOT/src"
EXPORTS_BASE_DIR="$SCRIPT_DIR/exports"
# Logging functions
log() {
echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"
}
warn() {
echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1${NC}"
}
error() {
echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1${NC}"
exit 1
}
info() {
echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')] INFO: $1${NC}"
}
# Check if influx CLI is installed
command -v influx >/dev/null 2>&1 || error "InfluxDB CLI is not installed. Please install it first: brew install influxdb-cli"
# Check if jq is installed for JSON parsing
if ! command -v jq >/dev/null 2>&1; then
warn "jq is not installed. Installing it for JSON parsing..."
if command -v brew >/dev/null 2>&1; then
brew install jq || error "Failed to install jq. Please install it manually: brew install jq"
else
error "jq is not installed and brew is not available. Please install jq manually."
fi
fi
echo ""
echo "============================================"
echo " InfluxDB CSV Data Import"
echo "============================================"
echo ""
# Check if exports directory exists
if [ ! -d "$EXPORTS_BASE_DIR" ]; then
error "Exports directory not found: $EXPORTS_BASE_DIR"
fi
# List available source environments
echo "Available export source environments:"
ENVIRONMENTS=($(ls -d "$EXPORTS_BASE_DIR"/*/ 2>/dev/null | xargs -n 1 basename))
if [ ${#ENVIRONMENTS[@]} -eq 0 ]; then
error "No export environments found in: $EXPORTS_BASE_DIR"
fi
for i in "${!ENVIRONMENTS[@]}"; do
echo "$((i+1))) ${ENVIRONMENTS[$i]}"
done
echo ""
read -p "Select source environment (1-${#ENVIRONMENTS[@]}): " ENV_CHOICE
if [ "$ENV_CHOICE" -lt 1 ] || [ "$ENV_CHOICE" -gt ${#ENVIRONMENTS[@]} ]; then
error "Invalid choice"
fi
SOURCE_ENV="${ENVIRONMENTS[$((ENV_CHOICE-1))]}"
ENV_EXPORT_DIR="$EXPORTS_BASE_DIR/$SOURCE_ENV"
log "Selected source environment: $SOURCE_ENV"
# List available export timestamps
echo ""
echo "Available exports for $SOURCE_ENV:"
EXPORTS=($(ls -d "$ENV_EXPORT_DIR"/*/ 2>/dev/null | xargs -n 1 basename | sort -r))
if [ ${#EXPORTS[@]} -eq 0 ]; then
error "No exports found for environment: $SOURCE_ENV"
fi
for i in "${!EXPORTS[@]}"; do
EXPORT_PATH="$ENV_EXPORT_DIR/${EXPORTS[$i]}"
METADATA_FILE="$EXPORT_PATH/export-metadata.txt"
if [ -f "$METADATA_FILE" ]; then
EXPORT_SIZE=$(grep "Export Size:" "$METADATA_FILE" | cut -d: -f2 | xargs)
DATA_POINTS=$(grep "Data Points:" "$METADATA_FILE" | cut -d: -f2 | xargs)
EXPORT_DATE=$(grep "Date:" "$METADATA_FILE" | cut -d: -f2- | xargs)
echo "$((i+1))) ${EXPORTS[$i]} - $EXPORT_DATE ($EXPORT_SIZE, $DATA_POINTS points)"
else
echo "$((i+1))) ${EXPORTS[$i]}"
fi
done
echo ""
read -p "Select export to import (1-${#EXPORTS[@]}): " EXPORT_CHOICE
if [ "$EXPORT_CHOICE" -lt 1 ] || [ "$EXPORT_CHOICE" -gt ${#EXPORTS[@]} ]; then
error "Invalid choice"
fi
SELECTED_EXPORT="${EXPORTS[$((EXPORT_CHOICE-1))]}"
IMPORT_FROM_DIR="$ENV_EXPORT_DIR/$SELECTED_EXPORT"
log "Selected export: $SELECTED_EXPORT"
info "Export location: $IMPORT_FROM_DIR"
# Find CSV file
CSV_FILE=$(find "$IMPORT_FROM_DIR" -name "*.csv" | head -1)
if [ ! -f "$CSV_FILE" ]; then
error "No CSV file found in: $IMPORT_FROM_DIR"
fi
CSV_SIZE=$(du -sh "$CSV_FILE" | cut -f1)
info "CSV file: $(basename "$CSV_FILE") ($CSV_SIZE)"
# Select target environment for import
echo ""
echo "Select TARGET environment for import:"
echo "1) SandboxLocal"
echo "2) ProductionLocal"
echo ""
read -p "Enter your choice (1 or 2): " TARGET_ENV_CHOICE
case $TARGET_ENV_CHOICE in
1)
TARGET_ENVIRONMENT="SandboxLocal"
APPSETTINGS_FILE="$SRC_DIR/Managing.Api/appsettings.SandboxLocal.json"
;;
2)
TARGET_ENVIRONMENT="ProductionLocal"
APPSETTINGS_FILE="$SRC_DIR/Managing.Api/appsettings.ProductionLocal.json"
;;
*)
error "Invalid choice. Please run the script again and select 1 or 2."
;;
esac
log "Target environment: $TARGET_ENVIRONMENT"
# Check if appsettings file exists
if [ ! -f "$APPSETTINGS_FILE" ]; then
error "Configuration file not found: $APPSETTINGS_FILE"
fi
log "Reading configuration from: $APPSETTINGS_FILE"
# Parse InfluxDB settings from JSON
INFLUX_URL=$(jq -r '.InfluxDb.Url' "$APPSETTINGS_FILE")
INFLUX_ORG=$(jq -r '.InfluxDb.Organization' "$APPSETTINGS_FILE")
INFLUX_TOKEN=$(jq -r '.InfluxDb.Token' "$APPSETTINGS_FILE")
# Validate parsed values
if [ "$INFLUX_URL" = "null" ] || [ -z "$INFLUX_URL" ]; then
error "Failed to parse InfluxDb.Url from configuration file"
fi
if [ "$INFLUX_ORG" = "null" ] || [ -z "$INFLUX_ORG" ]; then
error "Failed to parse InfluxDb.Organization from configuration file"
fi
if [ "$INFLUX_TOKEN" = "null" ] || [ -z "$INFLUX_TOKEN" ]; then
error "Failed to parse InfluxDb.Token from configuration file"
fi
info "Target InfluxDB URL: $INFLUX_URL"
info "Organization: $INFLUX_ORG"
# Get bucket name
BUCKET_NAME="prices-bucket"
# Check if bucket exists
info "Checking if bucket '$BUCKET_NAME' exists..."
if influx bucket list --host "$INFLUX_URL" --org "$INFLUX_ORG" --token "$INFLUX_TOKEN" --name "$BUCKET_NAME" &>/dev/null; then
log "✅ Bucket '$BUCKET_NAME' exists"
else
warn "Bucket '$BUCKET_NAME' does not exist!"
read -p "Create the bucket now? (yes/no): " CREATE_BUCKET
if [ "$CREATE_BUCKET" = "yes" ]; then
influx bucket create \
--name "$BUCKET_NAME" \
--retention 0 \
--host "$INFLUX_URL" \
--org "$INFLUX_ORG" \
--token "$INFLUX_TOKEN" || error "Failed to create bucket"
log "✅ Bucket created successfully"
else
error "Cannot proceed without target bucket"
fi
fi
# Final confirmation
echo ""
warn "⚠️ IMPORTANT INFORMATION:"
echo " Source: $SOURCE_ENV/$SELECTED_EXPORT"
echo " Target: $TARGET_ENVIRONMENT ($INFLUX_URL)"
echo " Bucket: $BUCKET_NAME"
echo " Data Size: $CSV_SIZE"
warn " This will ADD data to the bucket (existing data will be preserved)"
warn " Duplicate timestamps may cause overwrites"
echo ""
read -p "Are you sure you want to continue? (yes/no): " CONFIRM
if [ "$CONFIRM" != "yes" ]; then
log "Import cancelled by user"
exit 0
fi
# Perform import
echo ""
log "🚀 Starting import operation..."
log "This may take several minutes for large files..."
echo ""
# Create a temporary file for line protocol conversion
TEMP_LP_FILE=$(mktemp)
trap "rm -f $TEMP_LP_FILE" EXIT
info "Converting CSV to line protocol format..."
# Convert annotated CSV to line protocol using awk
# Skip annotation lines (starting with #) and empty lines
awk -F',' '
BEGIN {OFS=","}
# Skip annotation lines
/^#/ {next}
# Skip empty lines
/^[[:space:]]*$/ {next}
# Process header to get field positions
NR==1 {
for (i=1; i<=NF; i++) {
field[$i] = i
}
next
}
# Process data rows
{
# Extract values
time = $field["_time"]
measurement = $field["_measurement"]
exchange = $field["exchange"]
ticker = $field["ticker"]
timeframe = $field["timeframe"]
# Skip if essential fields are missing
if (time == "" || measurement == "" || exchange == "" || ticker == "" || timeframe == "") next
# Build line protocol
# Format: measurement,tag1=value1,tag2=value2 field1=value1,field2=value2 timestamp
printf "%s,exchange=%s,ticker=%s,timeframe=%s ", measurement, exchange, ticker, timeframe
# Add fields
first = 1
for (fname in field) {
if (fname != "_time" && fname != "_start" && fname != "_stop" && fname != "_measurement" &&
fname != "exchange" && fname != "ticker" && fname != "timeframe" &&
fname != "result" && fname != "table" && fname != "") {
val = $field[fname]
if (val != "" && val != "NaN") {
if (!first) printf ","
# Check if value is numeric
if (val ~ /^[0-9]+$/) {
printf "%s=%si", fname, val
} else {
printf "%s=%s", fname, val
}
first = 0
}
}
}
# Add timestamp (convert RFC3339 to nanoseconds if needed)
printf " %s\n", time
}
' "$CSV_FILE" > "$TEMP_LP_FILE" 2>/dev/null || {
warn "CSV parsing method 1 failed, trying direct import..."
# Alternative: Use influx write with CSV format directly
info "Attempting direct CSV import..."
if influx write \
--host "$INFLUX_URL" \
--org "$INFLUX_ORG" \
--token "$INFLUX_TOKEN" \
--bucket "$BUCKET_NAME" \
--format csv \
--file "$CSV_FILE" 2>&1; then
log "✅ Import completed successfully using direct CSV method!"
echo ""
log "📊 Import Summary"
echo "============================================"
info "Source: $SOURCE_ENV/$SELECTED_EXPORT"
info "Target: $TARGET_ENVIRONMENT"
info "Bucket: $BUCKET_NAME"
log "Status: Success"
echo "============================================"
echo ""
exit 0
else
error "Both import methods failed. Please check the error messages above."
fi
}
# If line protocol was generated, import it
if [ -s "$TEMP_LP_FILE" ]; then
LP_LINES=$(wc -l < "$TEMP_LP_FILE" | xargs)
info "Generated $LP_LINES lines of line protocol"
# Import in batches to avoid timeouts
BATCH_SIZE=5000
TOTAL_LINES=$LP_LINES
CURRENT_LINE=0
info "Importing in batches of $BATCH_SIZE lines..."
while [ $CURRENT_LINE -lt $TOTAL_LINES ]; do
END_LINE=$((CURRENT_LINE + BATCH_SIZE))
BATCH_NUM=$((CURRENT_LINE / BATCH_SIZE + 1))
PROGRESS=$((CURRENT_LINE * 100 / TOTAL_LINES))
info "Processing batch $BATCH_NUM (Progress: ${PROGRESS}%)..."
# Extract batch and import
sed -n "$((CURRENT_LINE + 1)),${END_LINE}p" "$TEMP_LP_FILE" | \
influx write \
--host "$INFLUX_URL" \
--org "$INFLUX_ORG" \
--token "$INFLUX_TOKEN" \
--bucket "$BUCKET_NAME" \
--precision s 2>&1 || {
warn "Batch $BATCH_NUM had errors, continuing..."
}
CURRENT_LINE=$END_LINE
done
log "✅ Import completed successfully!"
else
error "Failed to generate line protocol data"
fi
# Final summary
echo ""
echo "============================================"
log "📊 Import Summary"
echo "============================================"
info "Source: $SOURCE_ENV/$SELECTED_EXPORT"
info "Target: $TARGET_ENVIRONMENT"
info "Bucket: $BUCKET_NAME"
info "File: $(basename "$CSV_FILE")"
info "Size: $CSV_SIZE"
log "Status: Complete"
echo "============================================"
echo ""
log "🎉 Data successfully imported to $TARGET_ENVIRONMENT!"
echo ""
info "Verify the import with:"
echo " influx query 'from(bucket:\"$BUCKET_NAME\") |> range(start:-1d) |> limit(n:10)' \\"
echo " --host \"$INFLUX_URL\" --org \"$INFLUX_ORG\" --token \"$INFLUX_TOKEN\""
echo ""