Add influxdb export/import + add new influxdb instance for prod

2025-10-28 12:56:42 +07:00
parent ffe1bed051
commit 5cef270d64
5 changed files with 993 additions and 2 deletions
--- a/scripts/influxdb/import-csv-data.sh
+++ b/scripts/influxdb/import-csv-data.sh
@@ -0,0 +1,378 @@
+#!/bin/bash
+
+# InfluxDB CSV Data Import Script
+# Usage: ./import-csv-data.sh
+
+set -e  # Exit on any error
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Get the directory where the script is located
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
+PROJECT_ROOT="$(dirname "$(dirname "$SCRIPT_DIR")")"
+SRC_DIR="$PROJECT_ROOT/src"
+EXPORTS_BASE_DIR="$SCRIPT_DIR/exports"
+
+# Logging functions
+log() {
+    echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"
+}
+
+warn() {
+    echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1${NC}"
+}
+
+error() {
+    echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1${NC}"
+    exit 1
+}
+
+info() {
+    echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')] INFO: $1${NC}"
+}
+
+# Check if influx CLI is installed
+command -v influx >/dev/null 2>&1 || error "InfluxDB CLI is not installed. Please install it first: brew install influxdb-cli"
+
+# Check if jq is installed for JSON parsing
+if ! command -v jq >/dev/null 2>&1; then
+    warn "jq is not installed. Installing it for JSON parsing..."
+    if command -v brew >/dev/null 2>&1; then
+        brew install jq || error "Failed to install jq. Please install it manually: brew install jq"
+    else
+        error "jq is not installed and brew is not available. Please install jq manually."
+    fi
+fi
+
+echo ""
+echo "============================================"
+echo "  InfluxDB CSV Data Import"
+echo "============================================"
+echo ""
+
+# Check if exports directory exists
+if [ ! -d "$EXPORTS_BASE_DIR" ]; then
+    error "Exports directory not found: $EXPORTS_BASE_DIR"
+fi
+
+# List available source environments
+echo "Available export source environments:"
+ENVIRONMENTS=($(ls -d "$EXPORTS_BASE_DIR"/*/ 2>/dev/null | xargs -n 1 basename))
+
+if [ ${#ENVIRONMENTS[@]} -eq 0 ]; then
+    error "No export environments found in: $EXPORTS_BASE_DIR"
+fi
+
+for i in "${!ENVIRONMENTS[@]}"; do
+    echo "$((i+1))) ${ENVIRONMENTS[$i]}"
+done
+echo ""
+read -p "Select source environment (1-${#ENVIRONMENTS[@]}): " ENV_CHOICE
+
+if [ "$ENV_CHOICE" -lt 1 ] || [ "$ENV_CHOICE" -gt ${#ENVIRONMENTS[@]} ]; then
+    error "Invalid choice"
+fi
+
+SOURCE_ENV="${ENVIRONMENTS[$((ENV_CHOICE-1))]}"
+ENV_EXPORT_DIR="$EXPORTS_BASE_DIR/$SOURCE_ENV"
+
+log "Selected source environment: $SOURCE_ENV"
+
+# List available export timestamps
+echo ""
+echo "Available exports for $SOURCE_ENV:"
+EXPORTS=($(ls -d "$ENV_EXPORT_DIR"/*/ 2>/dev/null | xargs -n 1 basename | sort -r))
+
+if [ ${#EXPORTS[@]} -eq 0 ]; then
+    error "No exports found for environment: $SOURCE_ENV"
+fi
+
+for i in "${!EXPORTS[@]}"; do
+    EXPORT_PATH="$ENV_EXPORT_DIR/${EXPORTS[$i]}"
+    METADATA_FILE="$EXPORT_PATH/export-metadata.txt"
+    
+    if [ -f "$METADATA_FILE" ]; then
+        EXPORT_SIZE=$(grep "Export Size:" "$METADATA_FILE" | cut -d: -f2 | xargs)
+        DATA_POINTS=$(grep "Data Points:" "$METADATA_FILE" | cut -d: -f2 | xargs)
+        EXPORT_DATE=$(grep "Date:" "$METADATA_FILE" | cut -d: -f2- | xargs)
+        echo "$((i+1))) ${EXPORTS[$i]} - $EXPORT_DATE ($EXPORT_SIZE, $DATA_POINTS points)"
+    else
+        echo "$((i+1))) ${EXPORTS[$i]}"
+    fi
+done
+echo ""
+read -p "Select export to import (1-${#EXPORTS[@]}): " EXPORT_CHOICE
+
+if [ "$EXPORT_CHOICE" -lt 1 ] || [ "$EXPORT_CHOICE" -gt ${#EXPORTS[@]} ]; then
+    error "Invalid choice"
+fi
+
+SELECTED_EXPORT="${EXPORTS[$((EXPORT_CHOICE-1))]}"
+IMPORT_FROM_DIR="$ENV_EXPORT_DIR/$SELECTED_EXPORT"
+
+log "Selected export: $SELECTED_EXPORT"
+info "Export location: $IMPORT_FROM_DIR"
+
+# Find CSV file
+CSV_FILE=$(find "$IMPORT_FROM_DIR" -name "*.csv" | head -1)
+
+if [ ! -f "$CSV_FILE" ]; then
+    error "No CSV file found in: $IMPORT_FROM_DIR"
+fi
+
+CSV_SIZE=$(du -sh "$CSV_FILE" | cut -f1)
+info "CSV file: $(basename "$CSV_FILE") ($CSV_SIZE)"
+
+# Select target environment for import
+echo ""
+echo "Select TARGET environment for import:"
+echo "1) SandboxLocal"
+echo "2) ProductionLocal"
+echo ""
+read -p "Enter your choice (1 or 2): " TARGET_ENV_CHOICE
+
+case $TARGET_ENV_CHOICE in
+    1)
+        TARGET_ENVIRONMENT="SandboxLocal"
+        APPSETTINGS_FILE="$SRC_DIR/Managing.Api/appsettings.SandboxLocal.json"
+        ;;
+    2)
+        TARGET_ENVIRONMENT="ProductionLocal"
+        APPSETTINGS_FILE="$SRC_DIR/Managing.Api/appsettings.ProductionLocal.json"
+        ;;
+    *)
+        error "Invalid choice. Please run the script again and select 1 or 2."
+        ;;
+esac
+
+log "Target environment: $TARGET_ENVIRONMENT"
+
+# Check if appsettings file exists
+if [ ! -f "$APPSETTINGS_FILE" ]; then
+    error "Configuration file not found: $APPSETTINGS_FILE"
+fi
+
+log "Reading configuration from: $APPSETTINGS_FILE"
+
+# Parse InfluxDB settings from JSON
+INFLUX_URL=$(jq -r '.InfluxDb.Url' "$APPSETTINGS_FILE")
+INFLUX_ORG=$(jq -r '.InfluxDb.Organization' "$APPSETTINGS_FILE")
+INFLUX_TOKEN=$(jq -r '.InfluxDb.Token' "$APPSETTINGS_FILE")
+
+# Validate parsed values
+if [ "$INFLUX_URL" = "null" ] || [ -z "$INFLUX_URL" ]; then
+    error "Failed to parse InfluxDb.Url from configuration file"
+fi
+
+if [ "$INFLUX_ORG" = "null" ] || [ -z "$INFLUX_ORG" ]; then
+    error "Failed to parse InfluxDb.Organization from configuration file"
+fi
+
+if [ "$INFLUX_TOKEN" = "null" ] || [ -z "$INFLUX_TOKEN" ]; then
+    error "Failed to parse InfluxDb.Token from configuration file"
+fi
+
+info "Target InfluxDB URL: $INFLUX_URL"
+info "Organization: $INFLUX_ORG"
+
+# Get bucket name
+BUCKET_NAME="prices-bucket"
+
+# Check if bucket exists
+info "Checking if bucket '$BUCKET_NAME' exists..."
+if influx bucket list --host "$INFLUX_URL" --org "$INFLUX_ORG" --token "$INFLUX_TOKEN" --name "$BUCKET_NAME" &>/dev/null; then
+    log "✅ Bucket '$BUCKET_NAME' exists"
+else
+    warn "Bucket '$BUCKET_NAME' does not exist!"
+    read -p "Create the bucket now? (yes/no): " CREATE_BUCKET
+    if [ "$CREATE_BUCKET" = "yes" ]; then
+        influx bucket create \
+            --name "$BUCKET_NAME" \
+            --retention 0 \
+            --host "$INFLUX_URL" \
+            --org "$INFLUX_ORG" \
+            --token "$INFLUX_TOKEN" || error "Failed to create bucket"
+        log "✅ Bucket created successfully"
+    else
+        error "Cannot proceed without target bucket"
+    fi
+fi
+
+# Final confirmation
+echo ""
+warn "⚠️  IMPORTANT INFORMATION:"
+echo "   Source: $SOURCE_ENV/$SELECTED_EXPORT"
+echo "   Target: $TARGET_ENVIRONMENT ($INFLUX_URL)"
+echo "   Bucket: $BUCKET_NAME"
+echo "   Data Size: $CSV_SIZE"
+warn "   This will ADD data to the bucket (existing data will be preserved)"
+warn "   Duplicate timestamps may cause overwrites"
+echo ""
+read -p "Are you sure you want to continue? (yes/no): " CONFIRM
+
+if [ "$CONFIRM" != "yes" ]; then
+    log "Import cancelled by user"
+    exit 0
+fi
+
+# Perform import
+echo ""
+log "🚀 Starting import operation..."
+log "This may take several minutes for large files..."
+echo ""
+
+# Create a temporary file for line protocol conversion
+TEMP_LP_FILE=$(mktemp)
+trap "rm -f $TEMP_LP_FILE" EXIT
+
+info "Converting CSV to line protocol format..."
+
+# Convert annotated CSV to line protocol using awk
+# Skip annotation lines (starting with #) and empty lines
+awk -F',' '
+    BEGIN {OFS=","}
+    # Skip annotation lines
+    /^#/ {next}
+    # Skip empty lines
+    /^[[:space:]]*$/ {next}
+    # Process header to get field positions
+    NR==1 {
+        for (i=1; i<=NF; i++) {
+            field[$i] = i
+        }
+        next
+    }
+    # Process data rows
+    {
+        # Extract values
+        time = $field["_time"]
+        measurement = $field["_measurement"]
+        exchange = $field["exchange"]
+        ticker = $field["ticker"]
+        timeframe = $field["timeframe"]
+        
+        # Skip if essential fields are missing
+        if (time == "" || measurement == "" || exchange == "" || ticker == "" || timeframe == "") next
+        
+        # Build line protocol
+        # Format: measurement,tag1=value1,tag2=value2 field1=value1,field2=value2 timestamp
+        printf "%s,exchange=%s,ticker=%s,timeframe=%s ", measurement, exchange, ticker, timeframe
+        
+        # Add fields
+        first = 1
+        for (fname in field) {
+            if (fname != "_time" && fname != "_start" && fname != "_stop" && fname != "_measurement" && 
+                fname != "exchange" && fname != "ticker" && fname != "timeframe" && 
+                fname != "result" && fname != "table" && fname != "") {
+                val = $field[fname]
+                if (val != "" && val != "NaN") {
+                    if (!first) printf ","
+                    # Check if value is numeric
+                    if (val ~ /^[0-9]+$/) {
+                        printf "%s=%si", fname, val
+                    } else {
+                        printf "%s=%s", fname, val
+                    }
+                    first = 0
+                }
+            }
+        }
+        
+        # Add timestamp (convert RFC3339 to nanoseconds if needed)
+        printf " %s\n", time
+    }
+' "$CSV_FILE" > "$TEMP_LP_FILE" 2>/dev/null || {
+    warn "CSV parsing method 1 failed, trying direct import..."
+    
+    # Alternative: Use influx write with CSV format directly
+    info "Attempting direct CSV import..."
+    
+    if influx write \
+        --host "$INFLUX_URL" \
+        --org "$INFLUX_ORG" \
+        --token "$INFLUX_TOKEN" \
+        --bucket "$BUCKET_NAME" \
+        --format csv \
+        --file "$CSV_FILE" 2>&1; then
+        
+        log "✅ Import completed successfully using direct CSV method!"
+        
+        echo ""
+        log "📊 Import Summary"
+        echo "============================================"
+        info "Source: $SOURCE_ENV/$SELECTED_EXPORT"
+        info "Target: $TARGET_ENVIRONMENT"
+        info "Bucket: $BUCKET_NAME"
+        log "Status: Success"
+        echo "============================================"
+        echo ""
+        exit 0
+    else
+        error "Both import methods failed. Please check the error messages above."
+    fi
+}
+
+# If line protocol was generated, import it
+if [ -s "$TEMP_LP_FILE" ]; then
+    LP_LINES=$(wc -l < "$TEMP_LP_FILE" | xargs)
+    info "Generated $LP_LINES lines of line protocol"
+    
+    # Import in batches to avoid timeouts
+    BATCH_SIZE=5000
+    TOTAL_LINES=$LP_LINES
+    CURRENT_LINE=0
+    
+    info "Importing in batches of $BATCH_SIZE lines..."
+    
+    while [ $CURRENT_LINE -lt $TOTAL_LINES ]; do
+        END_LINE=$((CURRENT_LINE + BATCH_SIZE))
+        BATCH_NUM=$((CURRENT_LINE / BATCH_SIZE + 1))
+        PROGRESS=$((CURRENT_LINE * 100 / TOTAL_LINES))
+        
+        info "Processing batch $BATCH_NUM (Progress: ${PROGRESS}%)..."
+        
+        # Extract batch and import
+        sed -n "$((CURRENT_LINE + 1)),${END_LINE}p" "$TEMP_LP_FILE" | \
+        influx write \
+            --host "$INFLUX_URL" \
+            --org "$INFLUX_ORG" \
+            --token "$INFLUX_TOKEN" \
+            --bucket "$BUCKET_NAME" \
+            --precision s 2>&1 || {
+                warn "Batch $BATCH_NUM had errors, continuing..."
+            }
+        
+        CURRENT_LINE=$END_LINE
+    done
+    
+    log "✅ Import completed successfully!"
+else
+    error "Failed to generate line protocol data"
+fi
+
+# Final summary
+echo ""
+echo "============================================"
+log "📊 Import Summary"
+echo "============================================"
+info "Source: $SOURCE_ENV/$SELECTED_EXPORT"
+info "Target: $TARGET_ENVIRONMENT"
+info "Bucket: $BUCKET_NAME"
+info "File: $(basename "$CSV_FILE")"
+info "Size: $CSV_SIZE"
+log "Status: Complete"
+echo "============================================"
+echo ""
+
+log "🎉 Data successfully imported to $TARGET_ENVIRONMENT!"
+echo ""
+info "Verify the import with:"
+echo "  influx query 'from(bucket:\"$BUCKET_NAME\") |> range(start:-1d) |> limit(n:10)' \\"
+echo "    --host \"$INFLUX_URL\" --org \"$INFLUX_ORG\" --token \"$INFLUX_TOKEN\""
+echo ""
+