From 77849391915623a2f01dc8467ca8175755ac77a1 Mon Sep 17 00:00:00 2001
From: Nir Ben-Or <nir@swirldslabs.com>
Date: Tue, 17 Sep 2024 15:26:12 -0500
Subject: [PATCH 1/9] Initial commit, adding the bootstrap from export guide,
 bootstrap.shscript, and linking the bootstrap guide to the main DB README.md
 doc

Signed-off-by: Nir Ben-Or <nir@swirldslabs.com>
---
 docs/database/README.md                       |   4 +
 docs/database/bootstrap.md                    | 404 ++++++++++++++++++
 .../main/resources/db/scripts/bootstrap.sh    | 257 +++++++++++
 3 files changed, 665 insertions(+)
 create mode 100644 docs/database/bootstrap.md
 create mode 100644 hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh

diff --git a/docs/database/README.md b/docs/database/README.md
index d23e4a324d5..492e697fa4f 100644
--- a/docs/database/README.md
+++ b/docs/database/README.md
@@ -304,3 +304,7 @@ is expected to migrate full mainnet data in 10 days.
 ## Citus Backup and Restore
 
 Please refer to this [document](/docs/database/citus.md) for the steps.
+
+## Bootstrap a DB from exported data
+
+Please refer to this [document](/docs/database/bootstrap.md) for instructions.
\ No newline at end of file
diff --git a/docs/database/bootstrap.md b/docs/database/bootstrap.md
new file mode 100644
index 00000000000..b2ca7c41282
--- /dev/null
+++ b/docs/database/bootstrap.md
@@ -0,0 +1,404 @@
+# Mirror Node Database Bootstrap Guide
+
+This guide provides step-by-step instructions for setting up a fresh PostgreSQL 14 database and importing Mirror Node data into it. The process involves initializing the database, configuring environment variables, and running the import script. The data import is a long-running process, so it's recommended to run it within a `screen` or `tmux` session.
+
+---
+
+## Table of Contents
+
+- [Mirror Node Database Bootstrap Guide](#mirror-node-database-bootstrap-guide)
+  - [Table of Contents](#table-of-contents)
+  - [Prerequisites](#prerequisites)
+  - [Database Initialization](#database-initialization)
+    - [1. Configure Environment Variables](#1-configure-environment-variables)
+    - [2. Important Note for Google Cloud SQL Users](#2-important-note-for-google-cloud-sql-users)
+    - [3. Run the Initialization Script](#3-run-the-initialization-script)
+    - [4. Import the Database Schema](#4-import-the-database-schema)
+  - [Data Import Process](#data-import-process)
+    - [1. Download the Database Export Data](#1-download-the-database-export-data)
+    - [2. Download the Import Script](#2-download-the-import-script)
+    - [3. Run the Import Script](#3-run-the-import-script)
+  - [Mirror Node Version Compatibility](#mirror-node-version-compatibility)
+  - [Handling Failed Imports](#handling-failed-imports)
+    - [Steps to Handle Failed Imports:](#steps-to-handle-failed-imports)
+  - [Additional Notes](#additional-notes)
+  - [Troubleshooting](#troubleshooting)
+
+---
+
+## Prerequisites
+
+- **PostgreSQL 14** installed and running.
+- Access to a machine where you can run the initialization and import scripts and connect to the PostgreSQL database.
+- A Google Cloud Platform (GCP) account with a valid billing account attached (required for downloading data from a Requester Pays bucket).
+
+---
+
+## Database Initialization
+
+### 1. Configure Environment Variables
+
+Set the following environment variables on the machine from which you will run the initialization and import scripts. These variables allow for database connectivity and authentication.
+
+**Database Connection Variables:**
+
+```bash
+export PGUSER="postgres"
+export PGPASSWORD="YOUR_POSTGRES_PASSWORD"
+export PGDATABASE="postgres"
+export PGHOST="DB_IP_ADDRESS"
+```
+
+- `PGUSER`: The PostgreSQL superuser with administrative privileges (typically `postgres`).
+- `PGPASSWORD`: Password for the PostgreSQL superuser.
+- `PGDATABASE`: The default database to connect to (`postgres` by default).
+- `PGHOST`: The IP address or hostname of your PostgreSQL database server.
+
+**Database User Password Variables:**
+
+Set the following environment variables to define passwords for the various database users that will be created during initialization.
+
+```bash
+export GRAPHQL_PASSWORD="SET_PASSWORD"
+export GRPC_PASSWORD="SET_PASSWORD"
+export IMPORTER_PASSWORD="SET_PASSWORD"
+export OWNER_PASSWORD="SET_PASSWORD"
+export REST_PASSWORD="SET_PASSWORD"
+export REST_JAVA_PASSWORD="SET_PASSWORD"
+export ROSETTA_PASSWORD="SET_PASSWORD"
+export WEB3_PASSWORD="SET_PASSWORD"
+```
+
+- Replace `SET_PASSWORD` with strong, unique passwords for each respective user.
+
+### 2. Important Note for Google Cloud SQL Users
+
+If you are using **Google Cloud SQL** for your PostgreSQL database, an additional step is required before running the `init.sh` script to ensure proper initialization.
+
+**Add the Following Line to the Initialization Script:**
+
+Before running the `init.sh` script, you need to grant the `mirror_node` role to the `postgres` user. This is necessary because Google Cloud SQL restricts certain permissions for the `postgres` user.
+
+Add the following line **before** running the `init.sh` script:
+
+```sql
+GRANT mirror_node TO postgres;
+```
+
+**Revised Section of `init.sh`:**
+
+```sql
+-- Create database & owner
+CREATE USER :ownerUsername WITH LOGIN PASSWORD :'ownerPassword';
+GRANT mirror_node TO postgres;
+CREATE DATABASE :dbName WITH OWNER :ownerUsername;
+```
+
+- This adjustment ensures that the `postgres` user has the necessary permissions to execute the initialization script correctly on Google Cloud SQL.
+
+### 3. Run the Initialization Script
+
+Download the initialization script `init.sh` from the repository:
+
+```bash
+curl -O https://raw.githubusercontent.com/hashgraph/hedera-mirror-node/main/hedera-mirror-importer/src/main/resources/db/scripts/init.sh
+chmod +x init.sh
+```
+
+Run the initialization script:
+
+```bash
+./init.sh
+echo "EXIT STATUS: $?"
+```
+
+- The exit status `0` indicates the script executed successfully.
+- The script will create the `mirror_node` database, along with all necessary roles, users, and permissions within your PostgreSQL database, using the passwords specified in the environment variables.
+
+### 4. Import the Database Schema
+
+After the initialization script completes successfully, update the environment variables to connect using the `mirror_node` user and database:
+
+```bash
+export PGUSER="mirror_node"
+export PGPASSWORD="$OWNER_PASSWORD"  # Use the password set for OWNER_PASSWORD
+export PGDATABASE="mirror_node"
+```
+
+Import the database schema:
+
+```bash
+psql -f schema.sql
+echo "EXIT STATUS: $?"
+```
+
+- Ensure the exit status is `0` to confirm the schema was imported successfully.
+
+---
+
+## Data Import Process
+
+### 1. Download the Database Export Data
+
+The Mirror Node database export data is available in a Google Cloud Storage (GCS) bucket:
+
+- **Bucket URL:** [mirrornode-db-export](https://console.cloud.google.com/storage/browser/mirrornode-db-export)
+
+**Important Notes:**
+
+- The bucket is **read-only** to the public.
+- It is configured as **Requester Pays**, meaning you need a GCP account with a valid billing account attached to download the data.
+- You will be billed for the data transfer fees incurred during the download.
+
+**Download Instructions:**
+
+1. **Authenticate with GCP:**
+
+   Ensure you have the [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) installed and authenticated:
+
+   ```bash
+   gcloud auth login
+   gcloud config set billing/disable_usage_reporting false
+   ```
+
+2. **Set the Default Project:**
+
+   ```bash
+   gcloud config set project YOUR_GCP_PROJECT_ID
+   ```
+
+3. **Download the Data:**
+
+   Create an empty directory to store the data and download all files and subdirectories:
+
+   ```bash
+   mkdir -p /path/to/db_export
+   gsutil -u YOUR_GCP_PROJECT_ID -m cp -r gs://mirrornode-db-export/* /path/to/db_export/
+   ```
+
+   - Replace `/path/to/db_export` with your desired directory path.
+   - Ensure all files and subdirectories are downloaded into this single parent directory.
+   - **Note:** The `-m` flag enables parallel downloads to speed up the process.
+
+### 2. Download the Import Script
+
+Download the import script `bootstrap.sh` from the repository:
+
+```bash
+curl -O https://raw.githubusercontent.com/hashgraph/hedera-mirror-node/main/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
+chmod +x bootstrap.sh
+```
+
+### 3. Run the Import Script
+
+The import script is designed to efficiently import the Mirror Node data into your PostgreSQL database. It handles compressed CSV files and uses parallel processing to speed up the import.
+
+**Script Summary:**
+
+- **Name:** `bootstrap.sh`
+- **Functionality:** Imports data from compressed CSV files into the PostgreSQL database using parallel processing. It processes multiple tables concurrently based on the number of CPU cores specified.
+- **Requirements:** Ensure that the environment variables for database connectivity are set (`PGUSER`, `PGPASSWORD`, `PGDATABASE`, `PGHOST`).
+
+**Instructions:**
+
+1. **Ensure Environment Variables are Set:**
+
+   The environment variables should still be set from the previous steps. Verify them:
+
+   ```bash
+   echo $PGUSER     # Should output 'mirror_node'
+   echo $PGPASSWORD # Should output the password you set for OWNER_PASSWORD
+   echo $PGDATABASE # Should output 'mirror_node'
+   echo $PGHOST     # Should be set to your DB IP address
+   ```
+
+2. **Run the Import Script within a `screen` or `tmux` Session:**
+
+   It's recommended to run the import script within a `screen` or `tmux` session, as the import process may take several hours to complete.
+
+   **Using `screen`:**
+
+   ```bash
+   screen -S db_import
+   ```
+
+   **Run the Import Script:**
+
+   ```bash
+   ./bootstrap.sh 8 /path/to/db_export/
+   ```
+
+   - `8` refers to the number of CPU cores to use for parallel processing. Adjust this number based on your system's resources.
+   - `/path/to/db_export/` is the directory where you downloaded the database export data.
+
+   **Detach from the `screen` Session:**
+
+   Press `Ctrl+A` then `D`.
+
+   - This allows the import process to continue running in the background.
+
+   **Reattach to the `screen` Session Later:**
+
+   ```bash
+   screen -r db_import
+   ```
+
+3. **Monitor the Import Process:**
+
+   - The script will output logs indicating the progress of the import.
+   - Check the `import.log` file for detailed logs and any error messages.
+
+4. **Check the Exit Status:**
+
+   After the script completes, check the exit status:
+
+   ```bash
+   echo "EXIT STATUS: $?"
+   ```
+
+   - An exit status of `0` indicates the import completed successfully.
+   - If the exit status is not `0`, refer to the `import.log` file and `import_tracking.txt` for troubleshooting.
+
+---
+
+## Mirror Node Version Compatibility
+
+Before initializing your Mirror Node with the imported database, it's crucial to ensure version compatibility.
+
+**MIRRORNODE_VERSION File:**
+
+- In the database export data, there is a file named `MIRRORNODE_VERSION`.
+- This file contains the version of the Mirror Node at the time of the database export.
+
+**Importance:**
+
+- Your Mirror Node instance must be initialized with the **same version** as specified in the `MIRRORNODE_VERSION` file.
+- Using a different version may lead to compatibility issues and/or schema mismatches.
+
+**Action Required:**
+
+1. **Check the Mirror Node Version:**
+
+   - Open the `MIRRORNODE_VERSION` file:
+
+     ```bash
+     cat /path/to/db_export/MIRRORNODE_VERSION
+     ```
+
+   - Note the version number specified.
+
+---
+
+## Handling Failed Imports
+
+During the import process, the script generates a file named `import_tracking.txt`, which logs the status of each file import. Each line in this file contains the path and name of a file, followed by its import status: `NOT_STARTED`, `IN_PROGRESS`, `IMPORTED`, or `FAILED_TO_IMPORT`.
+
+**Statuses:**
+
+- `NOT_STARTED`: The file has not yet been processed.
+- `IN_PROGRESS`: The file is currently being imported.
+- `IMPORTED`: The file was successfully imported.
+- `FAILED_TO_IMPORT`: The file failed to import.
+
+**Example of `import_tracking.txt`:**
+
+```
+/path/to/db_export/record_file.csv.gz IMPORTED
+/path/to/db_export/transaction/transaction_part_1.csv.gz IMPORTED
+/path/to/db_export/transaction/transaction_part_2.csv.gz FAILED_TO_IMPORT
+/path/to/db_export/account.csv.gz NOT_STARTED
+```
+
+### Steps to Handle Failed Imports:
+
+1. **Identify Files to Re-import:**
+
+   - Open the `import_tracking.txt` file.
+   - Look for files with the status `FAILED_TO_IMPORT` or `NOT_STARTED`.
+   - These files either failed to import or were not processed due to interruption.
+
+2. **Re-run the Import Script:**
+
+   - You can re-run the import script; it will skip files marked as `IMPORTED` and attempt to import files with statuses `NOT_STARTED`, `IN_PROGRESS`, or `FAILED_TO_IMPORT`.
+
+     ```bash
+     ./bootstrap.sh 8 /path/to/db_export/
+     ```
+
+   - The script will resume importing where it left off.
+
+3. **Alternatively, Collect Specific Files to Re-import:**
+
+   - Create a new directory to hold the files to be re-imported:
+
+     ```bash
+     mkdir -p /path/to/reimport_files
+     ```
+
+   - Copy the failed and not started files to the new directory:
+
+     ```bash
+     grep -E "FAILED_TO_IMPORT|NOT_STARTED" import_tracking.txt | awk '{print $1}' | xargs -I {} cp "{}" /path/to/reimport_files/
+     ```
+
+   - Run the import script, pointing it to the new directory:
+
+     ```bash
+     ./bootstrap.sh 8 /path/to/reimport_files/
+     ```
+
+4. **Verify the Imports:**
+
+   - Check the `import_tracking.txt` and `import.log` files to ensure that all files have been imported successfully.
+   - If files continue to fail, review the error messages in `import.log` for troubleshooting.
+
+**Note on Data Consistency:**
+
+- When a file import fails, the database transaction ensures that **no partial data** is committed.
+- This means that when you re-run the import script, you can safely re-import failed files without worrying about duplicates or inconsistencies.
+- The database tables remain in the same state as before the failed import attempt.
+
+---
+
+## Additional Notes
+
+- **Data Integrity:** The import script ensures data integrity by using transactions. If an error occurs during the import of a file, that file's data will not be committed to the database.
+- **System Resources:** Adjust the number of CPU cores used (`8` in the example) based on your system's capabilities to prevent overloading the server.
+- **Security:** Ensure that the passwords set in the environment variables are kept secure and not exposed in logs or command history.
+- **Concurrent Write Safety:** The script uses file locking (`flock`) to safely handle concurrent writes to `import_tracking.txt`. This prevents race conditions and ensures the tracking file remains consistent.
+- **Resuming Imports:** The script maintains the status of all files in `import_tracking.txt`, allowing you to resume imports after an interruption without re-importing already imported files.
+- **Required Tools:** Ensure that all required tools (`psql`, `gunzip`, `realpath`, `flock`) are installed on your system.
+
+---
+
+## Troubleshooting
+
+- **Connection Errors:**
+
+  - Confirm that `PGHOST` is correctly set to the IP address or hostname of your database server.
+  - Ensure that the database server allows connections from your client machine.
+
+- **Import Failures:**
+
+  - Check the `import.log` file generated by the import script for detailed error messages.
+  - Review the `import_tracking.txt` file to identify which files failed to import.
+
+- **Interruption Handling:**
+
+  - If the import process is interrupted (e.g., due to a network issue or manual cancellation), the script updates the statuses in `import_tracking.txt` accordingly.
+    - Files that were in progress will be marked as `IN_PROGRESS` or remain as `NOT_STARTED` if they had not begun.
+  - Upon restarting the script, it will:
+    - Skip files marked as `IMPORTED`.
+    - Attempt to import files with statuses `NOT_STARTED`, `IN_PROGRESS`, or `FAILED_TO_IMPORT`.
+
+- **Bash Version Compatibility:**
+
+  - The import script requires Bash version 4.3 or higher. Check your Bash version with:
+
+    ```bash
+    bash --version
+    ```
+
+  - If using an older version of Bash, consider updating to the minimum required version.
+
+---
diff --git a/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh b/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
new file mode 100644
index 00000000000..64a5ad65723
--- /dev/null
+++ b/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Enable job control
+set -m
+
+show_help() {
+  echo "Usage: $0 [OPTIONS] DB_CPU_CORES IMPORT_DIR"
+  echo
+  echo "Imports data into a PostgreSQL database from compressed CSV files."
+  echo
+  echo "Options:"
+  echo "  -h, --help, -H     Show this help message and exit."
+  echo
+  echo "Arguments:"
+  echo "  DB_CPU_CORES       Number of CPU cores on the DB instance to thread the import jobs."
+  echo "  IMPORT_DIR         Path to the directory containing the compressed CSV files."
+  echo
+  echo "Example:"
+  echo "  $0 8 /path/to/db_export"
+  echo
+}
+
+# Parse options
+if [[ $# -eq 0 ]]; then
+  echo "No arguments provided. Use --help or -h for usage information."
+  exit 1
+fi
+
+while [[ "$#" -gt 0 ]]; do
+  case $1 in
+    -h|--help|-H)
+      show_help
+      exit 0
+      ;;
+    *)
+      break
+      ;;
+  esac
+done
+
+# Check if required arguments are supplied
+if [[ -z "$1" || -z "$2" ]]; then
+  echo "Error: Both DB_CPU_CORES and IMPORT_DIR must be provided."
+  echo "Use --help or -h for usage information."
+  exit 1
+fi
+
+DB_CPU_CORES="$1"
+IMPORT_DIR="$2"
+
+# Convert IMPORT_DIR to an absolute path
+IMPORT_DIR="$(realpath "$IMPORT_DIR")"
+
+# Check if IMPORT_DIR exists and is a directory
+if [[ ! -d "$IMPORT_DIR" ]]; then
+  echo "Error: IMPORT_DIR '$IMPORT_DIR' does not exist or is not a directory."
+  exit 1
+fi
+
+AVAILABLE_CORES=$(( $(nproc) - 1 ))  # Leave one core free for the local system
+DB_AVAILABLE_CORES=$((DB_CPU_CORES - 1))  # Leave one core free for the DB instance
+
+if [[ $AVAILABLE_CORES -lt $DB_AVAILABLE_CORES ]]; then
+  DB_AVAILABLE_CORES=$AVAILABLE_CORES
+fi
+
+max_jobs="$DB_AVAILABLE_CORES"
+
+# Set PostgreSQL environment variables
+export PGUSER=${PGUSER:-"DB_OWNER"}
+export PGPASSWORD=${PGPASSWORD:-"DB_PASSWORD"}
+export PGHOST=${PGHOST:-"DB_ADDRESS"}
+export PGDATABASE=${PGDATABASE:-"DB_NAME"}
+
+LOG_FILE="import.log"
+TRACKING_FILE="import_tracking.txt"
+LOCK_FILE="import_tracking.lock"
+
+# Check if required tools are installed
+REQUIRED_TOOLS=("psql" "gunzip" "realpath" "flock")
+for tool in "${REQUIRED_TOOLS[@]}"; do
+  if ! command -v "$tool" &> /dev/null; then
+    echo "Error: $tool is not installed. Please install it to continue."
+    exit 1
+  fi
+done
+
+# Log using UTC times
+log() {
+  local msg="$1"
+  local level="${2:-INFO}"
+  local timestamp
+  timestamp=$(date -u '+%Y-%m-%d %H:%M:%S')
+
+  echo "$timestamp - $level - $msg" | tee -a "$LOG_FILE"
+}
+
+# Function to kill a process and its descendants
+kill_descendants() {
+  local pid="$1"
+  local children
+  children=$(pgrep -P "$pid")
+  for child in $children; do
+    kill_descendants "$child"
+  done
+  kill -TERM "$pid" 2>/dev/null
+}
+
+# Function to handle script termination
+cleanup() {
+  log "Script interrupted. Terminating background jobs..." "ERROR"
+  # Ignore further signals during cleanup
+  trap '' SIGINT SIGTERM
+
+  # Kill all background jobs and their descendants
+  for pid in "${pids[@]}"; do
+    kill_descendants "$pid"
+  done
+
+  wait 2>/dev/null
+  log "All background jobs terminated."
+  exit 1
+}
+
+# Trap signals
+trap 'cleanup' SIGINT SIGTERM
+
+# Function to safely write to tracking file with lock
+write_tracking_file() {
+  local file="$1"
+  local status="$2"
+  (
+    flock -x 200
+
+    # Remove any existing entry for the file
+    grep -v "^$file " "$TRACKING_FILE" > "${TRACKING_FILE}.tmp" 2>/dev/null || true
+    mv "${TRACKING_FILE}.tmp" "$TRACKING_FILE"
+
+    # Add the new status
+    echo "$file $status" >> "$TRACKING_FILE"
+  ) 200>"$LOCK_FILE"
+}
+
+# Function to read status from tracking file
+read_tracking_status() {
+  local file="$1"
+  grep "^$file " "$TRACKING_FILE" 2>/dev/null | awk '{print $2}'
+}
+
+# Function to collect all import tasks
+collect_import_tasks() {
+  find "$IMPORT_DIR" -type f -name "*.csv.gz"
+}
+
+# Main script execution
+log "Starting DB import."
+
+# Get the list of files to import
+mapfile -t files < <(collect_import_tasks)
+
+# Initialize the tracking file with all files as NOT_STARTED
+(
+  flock -x 200
+  for file in "${files[@]}"; do
+    # Only add if not already in tracking file
+    if ! grep -q "^$file " "$TRACKING_FILE" 2>/dev/null; then
+      echo "$file NOT_STARTED" >> "$TRACKING_FILE"
+    fi
+  done
+) 200>"$LOCK_FILE"
+
+# Initialize variables
+pids=()
+overall_success=0
+
+# Export necessary functions and variables
+export -f import_file log kill_descendants write_tracking_file read_tracking_status
+export IMPORT_DIR LOG_FILE TRACKING_FILE LOCK_FILE PGUSER PGPASSWORD PGHOST PGDATABASE
+
+# Function to import a single file
+import_file() {
+  local file="$1"
+  local table
+
+  # Determine the table name
+  if [[ "$(dirname "$file")" == "$IMPORT_DIR" ]]; then
+    table=$(basename "$file" .csv.gz)
+  else
+    table=$(basename "$(dirname "$file")")
+  fi
+
+  # Update status to IN_PROGRESS
+  write_tracking_file "$file" "IN_PROGRESS"
+  log "Importing table $table from $file"
+
+  if {
+    echo "BEGIN;"
+    echo "\\copy $table FROM STDIN WITH CSV HEADER;"
+    gunzip -c "$file"
+    echo "\."
+    echo "COMMIT;"
+  } | psql -q -v ON_ERROR_STOP=1; then
+    log "Successfully imported $file into $table"
+    # Update the status to IMPORTED
+    write_tracking_file "$file" "IMPORTED"
+  else
+    log "Failed to import $file into $table" "ERROR"
+    # Update the status to FAILED_TO_IMPORT
+    write_tracking_file "$file" "FAILED_TO_IMPORT"
+    return 1
+  fi
+}
+
+# Loop through files and manage parallel execution
+for file in "${files[@]}"; do
+  # Check if the file has already been imported
+  status=$(read_tracking_status "$file")
+  if [[ "$status" == "IMPORTED" ]]; then
+    log "Skipping already imported file $file"
+    continue
+  fi
+
+  # Wait if max_jobs are already running
+  while [[ ${#pids[@]} -ge $max_jobs ]]; do
+    # Wait for any job to finish
+    if ! wait -n; then
+      overall_success=1
+    fi
+
+    # Remove completed PIDs from the array
+    new_pids=()
+    for pid in "${pids[@]}"; do
+      if kill -0 "$pid" 2>/dev/null; then
+        new_pids+=("$pid")
+      fi
+    done
+    pids=("${new_pids[@]}")
+  done
+
+  # Start import in background
+  import_file "$file" &
+  pids+=($!)
+done
+
+# Wait for all remaining jobs to finish
+for pid in "${pids[@]}"; do
+  if ! wait "$pid"; then
+    overall_success=1
+  fi
+done
+
+if [[ $overall_success -eq 0 ]]; then
+  log "DB import completed successfully."
+else
+  log "DB import completed with errors" "ERROR"
+  exit 1
+fi
\ No newline at end of file

From 563ccab619753b456487a3cfd7a6d5f349edd72a Mon Sep 17 00:00:00 2001
From: Nir Ben-Or <nir@swirldslabs.com>
Date: Tue, 17 Sep 2024 15:39:29 -0500
Subject: [PATCH 2/9] Improve comments

Signed-off-by: Nir Ben-Or <nir@swirldslabs.com>
---
 .../src/main/resources/db/scripts/bootstrap.sh       | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh b/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
index 64a5ad65723..8aecee08c84 100644
--- a/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
+++ b/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
@@ -95,7 +95,7 @@ log() {
   echo "$timestamp - $level - $msg" | tee -a "$LOG_FILE"
 }
 
-# Function to kill a process and its descendants
+# Kill a process and its descendants
 kill_descendants() {
   local pid="$1"
   local children
@@ -106,7 +106,7 @@ kill_descendants() {
   kill -TERM "$pid" 2>/dev/null
 }
 
-# Function to handle script termination
+# Handle script termination
 cleanup() {
   log "Script interrupted. Terminating background jobs..." "ERROR"
   # Ignore further signals during cleanup
@@ -125,7 +125,7 @@ cleanup() {
 # Trap signals
 trap 'cleanup' SIGINT SIGTERM
 
-# Function to safely write to tracking file with lock
+# Safely write to tracking file with lock
 write_tracking_file() {
   local file="$1"
   local status="$2"
@@ -141,13 +141,13 @@ write_tracking_file() {
   ) 200>"$LOCK_FILE"
 }
 
-# Function to read status from tracking file
+# Read status from tracking file
 read_tracking_status() {
   local file="$1"
   grep "^$file " "$TRACKING_FILE" 2>/dev/null | awk '{print $2}'
 }
 
-# Function to collect all import tasks
+# Collect all import tasks
 collect_import_tasks() {
   find "$IMPORT_DIR" -type f -name "*.csv.gz"
 }
@@ -177,7 +177,7 @@ overall_success=0
 export -f import_file log kill_descendants write_tracking_file read_tracking_status
 export IMPORT_DIR LOG_FILE TRACKING_FILE LOCK_FILE PGUSER PGPASSWORD PGHOST PGDATABASE
 
-# Function to import a single file
+# Import a single file
 import_file() {
   local file="$1"
   local table

From bba77e987ae0623e8fa37ed2307ad75841f3fd94 Mon Sep 17 00:00:00 2001
From: Nir Ben-Or <119968212+nirbosl@users.noreply.github.com>
Date: Thu, 19 Sep 2024 10:33:44 -0500
Subject: [PATCH 3/9] Update title - suggestion accepted

Co-authored-by: Steven Sheehy <17552371+steven-sheehy@users.noreply.github.com>
Signed-off-by: Nir Ben-Or <119968212+nirbosl@users.noreply.github.com>
---
 docs/database/bootstrap.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/database/bootstrap.md b/docs/database/bootstrap.md
index b2ca7c41282..b470f72633b 100644
--- a/docs/database/bootstrap.md
+++ b/docs/database/bootstrap.md
@@ -1,4 +1,4 @@
-# Mirror Node Database Bootstrap Guide
+# Database Bootstrap Guide
 
 This guide provides step-by-step instructions for setting up a fresh PostgreSQL 14 database and importing Mirror Node data into it. The process involves initializing the database, configuring environment variables, and running the import script. The data import is a long-running process, so it's recommended to run it within a `screen` or `tmux` session.
 

From 26dffb1f6a4ddae9b5b7ccd67d929a4749d020f8 Mon Sep 17 00:00:00 2001
From: Nir Ben-Or <nir@swirldslabs.com>
Date: Thu, 19 Sep 2024 11:51:02 -0500
Subject: [PATCH 4/9] Several changes and updates following Steven's review
 comments; More to come

Signed-off-by: Nir Ben-Or <nir@swirldslabs.com>
---
 docs/database/bootstrap.md                    | 38 +++++++++----------
 .../src/main/resources/db/scripts/init.sh     | 12 +++++-
 2 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/docs/database/bootstrap.md b/docs/database/bootstrap.md
index b470f72633b..c78fa192175 100644
--- a/docs/database/bootstrap.md
+++ b/docs/database/bootstrap.md
@@ -1,34 +1,32 @@
 # Database Bootstrap Guide
 
-This guide provides step-by-step instructions for setting up a fresh PostgreSQL 14 database and importing Mirror Node data into it. The process involves initializing the database, configuring environment variables, and running the import script. The data import is a long-running process, so it's recommended to run it within a `screen` or `tmux` session.
+This guide provides step-by-step instructions for setting up a fresh PostgreSQL database and importing Mirror Node data into it. The process involves initializing the database, configuring environment variables, and running the import script. The data import is a long-running process, so it's recommended to run it within a `screen` or `tmux` session.
 
 ---
 
 ## Table of Contents
 
-- [Mirror Node Database Bootstrap Guide](#mirror-node-database-bootstrap-guide)
-  - [Table of Contents](#table-of-contents)
-  - [Prerequisites](#prerequisites)
-  - [Database Initialization](#database-initialization)
-    - [1. Configure Environment Variables](#1-configure-environment-variables)
-    - [2. Important Note for Google Cloud SQL Users](#2-important-note-for-google-cloud-sql-users)
-    - [3. Run the Initialization Script](#3-run-the-initialization-script)
-    - [4. Import the Database Schema](#4-import-the-database-schema)
-  - [Data Import Process](#data-import-process)
-    - [1. Download the Database Export Data](#1-download-the-database-export-data)
-    - [2. Download the Import Script](#2-download-the-import-script)
-    - [3. Run the Import Script](#3-run-the-import-script)
-  - [Mirror Node Version Compatibility](#mirror-node-version-compatibility)
-  - [Handling Failed Imports](#handling-failed-imports)
-    - [Steps to Handle Failed Imports:](#steps-to-handle-failed-imports)
-  - [Additional Notes](#additional-notes)
-  - [Troubleshooting](#troubleshooting)
+- [Prerequisites](#prerequisites)
+- [Database Initialization](#database-initialization)
+  - [1. Configure Environment Variables](#1-configure-environment-variables)
+  - [2. Important Note for Google Cloud SQL Users](#2-important-note-for-google-cloud-sql-users)
+  - [3. Run the Initialization Script](#3-run-the-initialization-script)
+  - [4. Import the Database Schema](#4-import-the-database-schema)
+- [Data Import Process](#data-import-process)
+  - [1. Download the Database Export Data](#1-download-the-database-export-data)
+  - [2. Download the Import Script](#2-download-the-import-script)
+  - [3. Run the Import Script](#3-run-the-import-script)
+- [Mirror Node Version Compatibility](#mirror-node-version-compatibility)
+- [Handling Failed Imports](#handling-failed-imports)
+  - [Steps to Handle Failed Imports:](#steps-to-handle-failed-imports)
+- [Additional Notes](#additional-notes)
+- [Troubleshooting](#troubleshooting)
 
 ---
 
 ## Prerequisites
 
-- **PostgreSQL 14** installed and running.
+- **PostgreSQL 16** installed and running.
 - Access to a machine where you can run the initialization and import scripts and connect to the PostgreSQL database.
 - A Google Cloud Platform (GCP) account with a valid billing account attached (required for downloading data from a Requester Pays bucket).
 
@@ -47,12 +45,14 @@ export PGUSER="postgres"
 export PGPASSWORD="YOUR_POSTGRES_PASSWORD"
 export PGDATABASE="postgres"
 export PGHOST="DB_IP_ADDRESS"
+export PGPORT="DB_PORT"
 ```
 
 - `PGUSER`: The PostgreSQL superuser with administrative privileges (typically `postgres`).
 - `PGPASSWORD`: Password for the PostgreSQL superuser.
 - `PGDATABASE`: The default database to connect to (`postgres` by default).
 - `PGHOST`: The IP address or hostname of your PostgreSQL database server.
+- `PGPORT`: The database server port number (`5432` by default).
 
 **Database User Password Variables:**
 
diff --git a/hedera-mirror-importer/src/main/resources/db/scripts/init.sh b/hedera-mirror-importer/src/main/resources/db/scripts/init.sh
index ba6d7d2198e..a7fcbf7e7ff 100755
--- a/hedera-mirror-importer/src/main/resources/db/scripts/init.sh
+++ b/hedera-mirror-importer/src/main/resources/db/scripts/init.sh
@@ -4,7 +4,9 @@ set -e
 export PGCONNECT_TIMEOUT="${PGCONNECT_TIMEOUT:-3}"
 export PGDATABASE="${POSTGRES_DB:-postgres}"
 export PGHOST="${PGHOST}"
+export PGPORT="${PGPORT:-5432}"
 export PGUSER="${POSTGRES_USER:-postgres}"
+export IS_GCP_CLOUD_SQL="${IS_GCP_CLOUD_SQL:-false}"
 
 DB_SPECIFIC_EXTENSION_SQL="create extension btree_gist;
                            create extension pg_trgm;"
@@ -47,10 +49,18 @@ psql --set ON_ERROR_STOP=1 \
   --set "rosettaUsername=${ROSETTA_USERNAME:-mirror_rosetta}" \
   --set "web3Password=${WEB3_PASSWORD:-mirror_web3_pass}" \
   --set "web3Username=${WEB3_USERNAME:-mirror_web3}" \
-  --set "tempSchema=${DB_TEMPSCHEMA:-temporary}" <<__SQL__
+  --set "tempSchema=${DB_TEMPSCHEMA:-temporary}" \
+  --set "isGcpCloudSql=${IS_GCP_CLOUD_SQL}" \
+  --set "pgUser=${PGUSER}" <<__SQL__
 
 -- Create database & owner
 create user :ownerUsername with login password :'ownerPassword';
+
+-- Conditional GRANT statement for Google Cloud SQL
+\if :isGcpCloudSql
+  grant mirror_node to :pgUser;
+\endif
+
 create database :dbName with owner :ownerUsername;
 
 -- Add extensions

From e3e0b9cf73438cdb4167c529cf5302611bc78885 Mon Sep 17 00:00:00 2001
From: Nir Ben-Or <nir@swirldslabs.com>
Date: Thu, 19 Sep 2024 12:22:27 -0500
Subject: [PATCH 5/9] Continuation of the review comments resolution

Signed-off-by: Nir Ben-Or <nir@swirldslabs.com>
---
 docs/database/bootstrap.md | 27 +++++----------------------
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/docs/database/bootstrap.md b/docs/database/bootstrap.md
index c78fa192175..34baefae28c 100644
--- a/docs/database/bootstrap.md
+++ b/docs/database/bootstrap.md
@@ -73,32 +73,15 @@ export WEB3_PASSWORD="SET_PASSWORD"
 
 ### 2. Important Note for Google Cloud SQL Users
 
-If you are using **Google Cloud SQL** for your PostgreSQL database, an additional step is required before running the `init.sh` script to ensure proper initialization.
-
-**Add the Following Line to the Initialization Script:**
-
-Before running the `init.sh` script, you need to grant the `mirror_node` role to the `postgres` user. This is necessary because Google Cloud SQL restricts certain permissions for the `postgres` user.
-
-Add the following line **before** running the `init.sh` script:
-
-```sql
-GRANT mirror_node TO postgres;
-```
-
-**Revised Section of `init.sh`:**
-
-```sql
--- Create database & owner
-CREATE USER :ownerUsername WITH LOGIN PASSWORD :'ownerPassword';
-GRANT mirror_node TO postgres;
-CREATE DATABASE :dbName WITH OWNER :ownerUsername;
+If you are using **Google Cloud SQL** for your PostgreSQL database, you'll need to set an additional environment variable:
+```bash
+export IS_GCP_CLOUD_SQL="true"
 ```
-
-- This adjustment ensures that the `postgres` user has the necessary permissions to execute the initialization script correctly on Google Cloud SQL.
+*Note*: For non-Google Cloud SQL environments, you do not need to set this variable, as it defaults to false.
 
 ### 3. Run the Initialization Script
 
-Download the initialization script `init.sh` from the repository:
+Download the initialization script [`init.sh`](../../hedera-mirror-importer/src/main/resources/db/scripts/init.sh) from the repository:
 
 ```bash
 curl -O https://raw.githubusercontent.com/hashgraph/hedera-mirror-node/main/hedera-mirror-importer/src/main/resources/db/scripts/init.sh

From a4528cdae5b2de1a3cf0c4cdda0be1d92ec9791a Mon Sep 17 00:00:00 2001
From: Nir Ben-Or <119968212+nirbosl@users.noreply.github.com>
Date: Thu, 19 Sep 2024 12:31:05 -0500
Subject: [PATCH 6/9] Remove redundant "mirror node" in the title

Co-authored-by: Steven Sheehy <17552371+steven-sheehy@users.noreply.github.com>
Signed-off-by: Nir Ben-Or <119968212+nirbosl@users.noreply.github.com>
---
 docs/database/bootstrap.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/database/bootstrap.md b/docs/database/bootstrap.md
index 34baefae28c..8e9bf71c66f 100644
--- a/docs/database/bootstrap.md
+++ b/docs/database/bootstrap.md
@@ -244,7 +244,7 @@ The import script is designed to efficiently import the Mirror Node data into yo
 
 ---
 
-## Mirror Node Version Compatibility
+## Version Compatibility
 
 Before initializing your Mirror Node with the imported database, it's crucial to ensure version compatibility.
 

From 1bc16ebe10ded74bf1a87791e22535d775de5ff7 Mon Sep 17 00:00:00 2001
From: Nir Ben-Or <nir@swirldslabs.com>
Date: Thu, 19 Sep 2024 14:32:54 -0500
Subject: [PATCH 7/9] Several more changes based on comments

Signed-off-by: Nir Ben-Or <nir@swirldslabs.com>
---
 docs/database/bootstrap.md                    | 135 ++++------
 .../main/resources/db/scripts/bootstrap.sh    | 235 +++++++++++-------
 2 files changed, 188 insertions(+), 182 deletions(-)

diff --git a/docs/database/bootstrap.md b/docs/database/bootstrap.md
index 8e9bf71c66f..fa59abec9e4 100644
--- a/docs/database/bootstrap.md
+++ b/docs/database/bootstrap.md
@@ -16,7 +16,6 @@ This guide provides step-by-step instructions for setting up a fresh PostgreSQL
   - [1. Download the Database Export Data](#1-download-the-database-export-data)
   - [2. Download the Import Script](#2-download-the-import-script)
   - [3. Run the Import Script](#3-run-the-import-script)
-- [Mirror Node Version Compatibility](#mirror-node-version-compatibility)
 - [Handling Failed Imports](#handling-failed-imports)
   - [Steps to Handle Failed Imports:](#steps-to-handle-failed-imports)
 - [Additional Notes](#additional-notes)
@@ -26,9 +25,35 @@ This guide provides step-by-step instructions for setting up a fresh PostgreSQL
 
 ## Prerequisites
 
-- **PostgreSQL 16** installed and running.
-- Access to a machine where you can run the initialization and import scripts and connect to the PostgreSQL database.
-- A Google Cloud Platform (GCP) account with a valid billing account attached (required for downloading data from a Requester Pays bucket).
+1. **Version Compatibility**
+
+   Before initializing your Mirror Node with the imported database, it's crucial to ensure version compatibility.
+
+   **MIRRORNODE_VERSION File:**
+
+   - In the database export data, there is a file named `MIRRORNODE_VERSION`.
+   - This file contains the version of the Mirror Node at the time of the database export.
+
+   **Importance:**
+
+   - Your Mirror Node instance must be initialized with the **same version** as specified in the `MIRRORNODE_VERSION` file.
+   - Using a different version may lead to compatibility issues and/or schema mismatches.
+
+   **Action Required:**
+
+   1. **Check the Mirror Node Version:**
+
+      - Open the `MIRRORNODE_VERSION` file:
+
+        ```bash
+        cat /path/to/db_export/MIRRORNODE_VERSION
+        ```
+
+      - Note the version number specified.
+
+2. **PostgreSQL 16** installed and running.
+3. Access to a machine where you can run the initialization and import scripts and connect to the PostgreSQL database.
+4. A Google Cloud Platform (GCP) account with a valid billing account attached (required for downloading data from a Requester Pays bucket).
 
 ---
 
@@ -54,6 +79,8 @@ export PGPORT="DB_PORT"
 - `PGHOST`: The IP address or hostname of your PostgreSQL database server.
 - `PGPORT`: The database server port number (`5432` by default).
 
+
+
 **Database User Password Variables:**
 
 Set the following environment variables to define passwords for the various database users that will be created during initialization.
@@ -71,6 +98,8 @@ export WEB3_PASSWORD="SET_PASSWORD"
 
 - Replace `SET_PASSWORD` with strong, unique passwords for each respective user.
 
+- **Security Note:** Ensure that the passwords set in the environment variables are kept secure and not exposed in logs or command history.
+
 ### 2. Important Note for Google Cloud SQL Users
 
 If you are using **Google Cloud SQL** for your PostgreSQL database, you'll need to set an additional environment variable:
@@ -244,34 +273,6 @@ The import script is designed to efficiently import the Mirror Node data into yo
 
 ---
 
-## Version Compatibility
-
-Before initializing your Mirror Node with the imported database, it's crucial to ensure version compatibility.
-
-**MIRRORNODE_VERSION File:**
-
-- In the database export data, there is a file named `MIRRORNODE_VERSION`.
-- This file contains the version of the Mirror Node at the time of the database export.
-
-**Importance:**
-
-- Your Mirror Node instance must be initialized with the **same version** as specified in the `MIRRORNODE_VERSION` file.
-- Using a different version may lead to compatibility issues and/or schema mismatches.
-
-**Action Required:**
-
-1. **Check the Mirror Node Version:**
-
-   - Open the `MIRRORNODE_VERSION` file:
-
-     ```bash
-     cat /path/to/db_export/MIRRORNODE_VERSION
-     ```
-
-   - Note the version number specified.
-
----
-
 ## Handling Failed Imports
 
 During the import process, the script generates a file named `import_tracking.txt`, which logs the status of each file import. Each line in this file contains the path and name of a file, followed by its import status: `NOT_STARTED`, `IN_PROGRESS`, `IMPORTED`, or `FAILED_TO_IMPORT`.
@@ -294,63 +295,29 @@ During the import process, the script generates a file named `import_tracking.tx
 
 ### Steps to Handle Failed Imports:
 
-1. **Identify Files to Re-import:**
-
-   - Open the `import_tracking.txt` file.
-   - Look for files with the status `FAILED_TO_IMPORT` or `NOT_STARTED`.
-   - These files either failed to import or were not processed due to interruption.
-
-2. **Re-run the Import Script:**
-
-   - You can re-run the import script; it will skip files marked as `IMPORTED` and attempt to import files with statuses `NOT_STARTED`, `IN_PROGRESS`, or `FAILED_TO_IMPORT`.
-
-     ```bash
-     ./bootstrap.sh 8 /path/to/db_export/
-     ```
-
-   - The script will resume importing where it left off.
-
-3. **Alternatively, Collect Specific Files to Re-import:**
-
-   - Create a new directory to hold the files to be re-imported:
-
-     ```bash
-     mkdir -p /path/to/reimport_files
-     ```
-
-   - Copy the failed and not started files to the new directory:
+1. **Re-run the Import Script:**
 
+   - Simply re-run the import script; it will automatically skip files marked as `IMPORTED` and attempt to import files with statuses `NOT_STARTED`, `IN_PROGRESS`, or `FAILED_TO_IMPORT`.
+   
      ```bash
-     grep -E "FAILED_TO_IMPORT|NOT_STARTED" import_tracking.txt | awk '{print $1}' | xargs -I {} cp "{}" /path/to/reimport_files/
+     ./your_import_script.sh 8 /path/to/db_export/
      ```
+   
+   - The script manages the import process, ensuring that only the necessary files are processed without manual intervention.
 
-   - Run the import script, pointing it to the new directory:
-
-     ```bash
-     ./bootstrap.sh 8 /path/to/reimport_files/
-     ```
-
-4. **Verify the Imports:**
+2. **Verify the Imports:**
 
    - Check the `import_tracking.txt` and `import.log` files to ensure that all files have been imported successfully.
+   
    - If files continue to fail, review the error messages in `import.log` for troubleshooting.
 
-**Note on Data Consistency:**
-
-- When a file import fails, the database transaction ensures that **no partial data** is committed.
-- This means that when you re-run the import script, you can safely re-import failed files without worrying about duplicates or inconsistencies.
-- The database tables remain in the same state as before the failed import attempt.
-
----
-
-## Additional Notes
-
-- **Data Integrity:** The import script ensures data integrity by using transactions. If an error occurs during the import of a file, that file's data will not be committed to the database.
+**Notes on Data Consistency:**
+  
 - **System Resources:** Adjust the number of CPU cores used (`8` in the example) based on your system's capabilities to prevent overloading the server.
-- **Security:** Ensure that the passwords set in the environment variables are kept secure and not exposed in logs or command history.
+
+- **Data Integrity:** When a file import fails, the database transaction ensures that **no partial data** is committed. This means that when you re-run the import script, you can safely re-import failed files without worrying about duplicates or inconsistencies; The database tables remain in the same state as before the failed import attempt.
+  
 - **Concurrent Write Safety:** The script uses file locking (`flock`) to safely handle concurrent writes to `import_tracking.txt`. This prevents race conditions and ensures the tracking file remains consistent.
-- **Resuming Imports:** The script maintains the status of all files in `import_tracking.txt`, allowing you to resume imports after an interruption without re-importing already imported files.
-- **Required Tools:** Ensure that all required tools (`psql`, `gunzip`, `realpath`, `flock`) are installed on your system.
 
 ---
 
@@ -374,14 +341,4 @@ During the import process, the script generates a file named `import_tracking.tx
     - Skip files marked as `IMPORTED`.
     - Attempt to import files with statuses `NOT_STARTED`, `IN_PROGRESS`, or `FAILED_TO_IMPORT`.
 
-- **Bash Version Compatibility:**
-
-  - The import script requires Bash version 4.3 or higher. Check your Bash version with:
-
-    ```bash
-    bash --version
-    ```
-
-  - If using an older version of Bash, consider updating to the minimum required version.
-
 ---
diff --git a/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh b/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
index 8aecee08c84..e7127f636ed 100644
--- a/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
+++ b/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
@@ -3,62 +3,31 @@
 # Enable job control
 set -m
 
-show_help() {
-  echo "Usage: $0 [OPTIONS] DB_CPU_CORES IMPORT_DIR"
-  echo
-  echo "Imports data into a PostgreSQL database from compressed CSV files."
-  echo
-  echo "Options:"
-  echo "  -h, --help, -H     Show this help message and exit."
-  echo
-  echo "Arguments:"
-  echo "  DB_CPU_CORES       Number of CPU cores on the DB instance to thread the import jobs."
-  echo "  IMPORT_DIR         Path to the directory containing the compressed CSV files."
-  echo
-  echo "Example:"
-  echo "  $0 8 /path/to/db_export"
-  echo
-}
-
-# Parse options
-if [[ $# -eq 0 ]]; then
-  echo "No arguments provided. Use --help or -h for usage information."
-  exit 1
-fi
+####################################
+# Variables
+####################################
 
-while [[ "$#" -gt 0 ]]; do
-  case $1 in
-    -h|--help|-H)
-      show_help
-      exit 0
-      ;;
-    *)
-      break
-      ;;
-  esac
-done
+# Define minimum required Bash version
+REQUIRED_BASH_MAJOR=4
+REQUIRED_BASH_MINOR=3
 
-# Check if required arguments are supplied
-if [[ -z "$1" || -z "$2" ]]; then
-  echo "Error: Both DB_CPU_CORES and IMPORT_DIR must be provided."
-  echo "Use --help or -h for usage information."
-  exit 1
-fi
+# PostgreSQL environment variables
+export PGUSER=${PGUSER:-"DB_OWNER"}
+export PGPASSWORD=${PGPASSWORD:-"DB_PASSWORD"}
+export PGHOST=${PGHOST:-"DB_ADDRESS"}
+export PGPORT="${PGPORT:-5432}"  # Added PGPORT with default value
+export PGDATABASE=${PGDATABASE:-"DB_NAME"}
 
+# Import script arguments
 DB_CPU_CORES="$1"
 IMPORT_DIR="$2"
 
 # Convert IMPORT_DIR to an absolute path
 IMPORT_DIR="$(realpath "$IMPORT_DIR")"
 
-# Check if IMPORT_DIR exists and is a directory
-if [[ ! -d "$IMPORT_DIR" ]]; then
-  echo "Error: IMPORT_DIR '$IMPORT_DIR' does not exist or is not a directory."
-  exit 1
-fi
-
-AVAILABLE_CORES=$(( $(nproc) - 1 ))  # Leave one core free for the local system
-DB_AVAILABLE_CORES=$((DB_CPU_CORES - 1))  # Leave one core free for the DB instance
+# Calculate available CPU cores
+AVAILABLE_CORES=$(( $(nproc) - 1 ))          # Leave one core free for the local system
+DB_AVAILABLE_CORES=$((DB_CPU_CORES - 1))     # Leave one core free for the DB instance
 
 if [[ $AVAILABLE_CORES -lt $DB_AVAILABLE_CORES ]]; then
   DB_AVAILABLE_CORES=$AVAILABLE_CORES
@@ -66,26 +35,49 @@ fi
 
 max_jobs="$DB_AVAILABLE_CORES"
 
-# Set PostgreSQL environment variables
-export PGUSER=${PGUSER:-"DB_OWNER"}
-export PGPASSWORD=${PGPASSWORD:-"DB_PASSWORD"}
-export PGHOST=${PGHOST:-"DB_ADDRESS"}
-export PGDATABASE=${PGDATABASE:-"DB_NAME"}
-
+# Logging and tracking files
 LOG_FILE="import.log"
 TRACKING_FILE="import_tracking.txt"
 LOCK_FILE="import_tracking.lock"
 
-# Check if required tools are installed
+# Required tools
 REQUIRED_TOOLS=("psql" "gunzip" "realpath" "flock")
-for tool in "${REQUIRED_TOOLS[@]}"; do
-  if ! command -v "$tool" &> /dev/null; then
-    echo "Error: $tool is not installed. Please install it to continue."
+
+####################################
+# Functions
+####################################
+
+# display help message
+show_help() {
+  echo "Usage: $0 [OPTIONS] DB_CPU_CORES IMPORT_DIR"
+  echo
+  echo "Imports data into a PostgreSQL database from compressed CSV files."
+  echo
+  echo "Options:"
+  echo "  -h, --help, -H     Show this help message and exit."
+  echo
+  echo "Arguments:"
+  echo "  DB_CPU_CORES       Number of CPU cores on the DB instance to thread the import jobs."
+  echo "  IMPORT_DIR         Path to the directory containing the compressed CSV files."
+  echo
+  echo "Example:"
+  echo "  $0 8 /path/to/db_export"
+  echo
+}
+
+# check Bash version
+check_bash_version() {
+  local current_major=${BASH_VERSINFO[0]}
+  local current_minor=${BASH_VERSINFO[1]}
+  
+  if (( current_major < REQUIRED_BASH_MAJOR )) || \
+     (( current_major == REQUIRED_BASH_MAJOR && current_minor < REQUIRED_BASH_MINOR )); then
+    echo "Error: Bash version ${REQUIRED_BASH_MAJOR}.${REQUIRED_BASH_MINOR}+ is required. Current version is ${BASH_VERSION}."
     exit 1
   fi
-done
+}
 
-# Log using UTC times
+# log messages with UTC timestamps
 log() {
   local msg="$1"
   local level="${2:-INFO}"
@@ -95,7 +87,7 @@ log() {
   echo "$timestamp - $level - $msg" | tee -a "$LOG_FILE"
 }
 
-# Kill a process and its descendants
+# kill a process and its descendants
 kill_descendants() {
   local pid="$1"
   local children
@@ -106,7 +98,7 @@ kill_descendants() {
   kill -TERM "$pid" 2>/dev/null
 }
 
-# Handle script termination
+# handle script termination
 cleanup() {
   log "Script interrupted. Terminating background jobs..." "ERROR"
   # Ignore further signals during cleanup
@@ -122,10 +114,7 @@ cleanup() {
   exit 1
 }
 
-# Trap signals
-trap 'cleanup' SIGINT SIGTERM
-
-# Safely write to tracking file with lock
+# safely write to the tracking file with a lock
 write_tracking_file() {
   local file="$1"
   local status="$2"
@@ -141,43 +130,18 @@ write_tracking_file() {
   ) 200>"$LOCK_FILE"
 }
 
-# Read status from tracking file
+# read status from the tracking file
 read_tracking_status() {
   local file="$1"
   grep "^$file " "$TRACKING_FILE" 2>/dev/null | awk '{print $2}'
 }
 
-# Collect all import tasks
+# collect all import tasks (compressed CSV files)
 collect_import_tasks() {
   find "$IMPORT_DIR" -type f -name "*.csv.gz"
 }
 
-# Main script execution
-log "Starting DB import."
-
-# Get the list of files to import
-mapfile -t files < <(collect_import_tasks)
-
-# Initialize the tracking file with all files as NOT_STARTED
-(
-  flock -x 200
-  for file in "${files[@]}"; do
-    # Only add if not already in tracking file
-    if ! grep -q "^$file " "$TRACKING_FILE" 2>/dev/null; then
-      echo "$file NOT_STARTED" >> "$TRACKING_FILE"
-    fi
-  done
-) 200>"$LOCK_FILE"
-
-# Initialize variables
-pids=()
-overall_success=0
-
-# Export necessary functions and variables
-export -f import_file log kill_descendants write_tracking_file read_tracking_status
-export IMPORT_DIR LOG_FILE TRACKING_FILE LOCK_FILE PGUSER PGPASSWORD PGHOST PGDATABASE
-
-# Import a single file
+# import a single file into the database
 import_file() {
   local file="$1"
   local table
@@ -211,6 +175,90 @@ import_file() {
   fi
 }
 
+####################################
+# Execution
+####################################
+
+# Perform the Bash version check
+check_bash_version
+
+# display help if no arguments are provided
+if [[ $# -eq 0 ]]; then
+  echo "No arguments provided. Use --help or -h for usage information."
+  exit 1
+fi
+
+# Parse options
+while [[ "$#" -gt 0 ]]; do
+  case $1 in
+    -h|--help|-H)
+      show_help
+      exit 0
+      ;;
+    *)
+      break
+      ;;
+  esac
+done
+
+# Check if required arguments are supplied
+if [[ -z "$DB_CPU_CORES" || -z "$IMPORT_DIR" ]]; then
+  echo "Error: Both DB_CPU_CORES and IMPORT_DIR must be provided."
+  echo "Use --help or -h for usage information."
+  exit 1
+fi
+
+# Check if IMPORT_DIR exists and is a directory
+if [[ ! -d "$IMPORT_DIR" ]]; then
+  echo "Error: IMPORT_DIR '$IMPORT_DIR' does not exist or is not a directory."
+  exit 1
+fi
+
+# Check if required tools are installed
+missing_tools=()
+for tool in "${REQUIRED_TOOLS[@]}"; do
+  if ! command -v "$tool" &> /dev/null; then
+    missing_tools+=("$tool")
+  fi
+done
+
+if [[ ${#missing_tools[@]} -gt 0 ]]; then
+  echo "Error: The following required tools are not installed:"
+  for tool in "${missing_tools[@]}"; do
+    echo "  - $tool"
+  done
+  echo "Please install them to continue."
+  exit 1
+fi
+
+# Trap signals for cleanup
+trap 'cleanup' SIGINT SIGTERM
+
+# Log the start of the import process
+log "Starting DB import."
+
+# Get the list of files to import
+mapfile -t files < <(collect_import_tasks)
+
+# Initialize the tracking file with all files as NOT_STARTED
+(
+  flock -x 200
+  for file in "${files[@]}"; do
+    # Only add if not already in tracking file
+    if ! grep -q "^$file " "$TRACKING_FILE" 2>/dev/null; then
+      echo "$file NOT_STARTED" >> "$TRACKING_FILE"
+    fi
+  done
+) 200>"$LOCK_FILE"
+
+# Initialize variables for background processes
+pids=()
+overall_success=0
+
+# Export necessary functions and variables for subshells
+export -f import_file log kill_descendants write_tracking_file read_tracking_status
+export IMPORT_DIR LOG_FILE TRACKING_FILE LOCK_FILE PGUSER PGPASSWORD PGHOST PGDATABASE
+
 # Loop through files and manage parallel execution
 for file in "${files[@]}"; do
   # Check if the file has already been imported
@@ -249,6 +297,7 @@ for pid in "${pids[@]}"; do
   fi
 done
 
+# Log the final status of the import process
 if [[ $overall_success -eq 0 ]]; then
   log "DB import completed successfully."
 else

From cd6fe752f73a91823b9b17e0efe3825790e02834 Mon Sep 17 00:00:00 2001
From: Nir Ben-Or <nir@swirldslabs.com>
Date: Thu, 19 Sep 2024 18:20:43 -0500
Subject: [PATCH 8/9] Almost done, two more items left - will be in the next
 commit

Signed-off-by: Nir Ben-Or <nir@swirldslabs.com>
---
 docs/database/bootstrap.md | 63 +++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 38 deletions(-)

diff --git a/docs/database/bootstrap.md b/docs/database/bootstrap.md
index fa59abec9e4..4fbabff1064 100644
--- a/docs/database/bootstrap.md
+++ b/docs/database/bootstrap.md
@@ -53,7 +53,16 @@ This guide provides step-by-step instructions for setting up a fresh PostgreSQL
 
 2. **PostgreSQL 16** installed and running.
 3. Access to a machine where you can run the initialization and import scripts and connect to the PostgreSQL database.
-4. A Google Cloud Platform (GCP) account with a valid billing account attached (required for downloading data from a Requester Pays bucket).
+4. Ensure the following tools are installed on your machine:
+    - psql
+    - gunzip
+    - realpath
+    - flock
+5. Install [Google Cloud SDK](https://cloud.google.com/sdk/docs/install), then authenticate:
+    ```
+    gcloud auth login
+    ```
+6. A Google Cloud Platform (GCP) account with a valid billing account attached (required for downloading data from a Requester Pays bucket).
 
 ---
 
@@ -164,28 +173,13 @@ The Mirror Node database export data is available in a Google Cloud Storage (GCS
 
 **Download Instructions:**
 
-1. **Authenticate with GCP:**
-
-   Ensure you have the [Google Cloud SDK](https://cloud.google.com/sdk/docs/install) installed and authenticated:
-
-   ```bash
-   gcloud auth login
-   gcloud config set billing/disable_usage_reporting false
-   ```
-
-2. **Set the Default Project:**
-
-   ```bash
-   gcloud config set project YOUR_GCP_PROJECT_ID
-   ```
-
-3. **Download the Data:**
+1. **Download the Data:**
 
    Create an empty directory to store the data and download all files and subdirectories:
 
    ```bash
    mkdir -p /path/to/db_export
-   gsutil -u YOUR_GCP_PROJECT_ID -m cp -r gs://mirrornode-db-export/* /path/to/db_export/
+   gsutil -u YOUR_GCP_PROJECT_ID -m cp -r gs://mirrornode-db-export/<$VERSION_NUMBER>/* /path/to/db_export/
    ```
 
    - Replace `/path/to/db_export` with your desired directory path.
@@ -293,24 +287,6 @@ During the import process, the script generates a file named `import_tracking.tx
 /path/to/db_export/account.csv.gz NOT_STARTED
 ```
 
-### Steps to Handle Failed Imports:
-
-1. **Re-run the Import Script:**
-
-   - Simply re-run the import script; it will automatically skip files marked as `IMPORTED` and attempt to import files with statuses `NOT_STARTED`, `IN_PROGRESS`, or `FAILED_TO_IMPORT`.
-   
-     ```bash
-     ./your_import_script.sh 8 /path/to/db_export/
-     ```
-   
-   - The script manages the import process, ensuring that only the necessary files are processed without manual intervention.
-
-2. **Verify the Imports:**
-
-   - Check the `import_tracking.txt` and `import.log` files to ensure that all files have been imported successfully.
-   
-   - If files continue to fail, review the error messages in `import.log` for troubleshooting.
-
 **Notes on Data Consistency:**
   
 - **System Resources:** Adjust the number of CPU cores used (`8` in the example) based on your system's capabilities to prevent overloading the server.
@@ -330,8 +306,19 @@ During the import process, the script generates a file named `import_tracking.tx
 
 - **Import Failures:**
 
-  - Check the `import.log` file generated by the import script for detailed error messages.
-  - Review the `import_tracking.txt` file to identify which files failed to import.
+   - Simply re-run the import script; it will automatically skip files marked as `IMPORTED` and attempt to import files with statuses `NOT_STARTED`, `IN_PROGRESS`, or `FAILED_TO_IMPORT`.
+   
+     ```bash
+     ./bootstrap.sh 8 /path/to/db_export/
+     ```
+   
+   - The script manages the import process, ensuring that only the necessary files are processed without manual intervention.
+
+  - Verify the Imports:
+
+     - Check the `import_tracking.txt` and `import.log` files to ensure that all files have been imported successfully.
+     
+     - If files continue to fail, review the error messages in `import.log` for troubleshooting.
 
 - **Interruption Handling:**
 

From e526a4f67d8ffb33aa6af40b7d58b881c01d6bc0 Mon Sep 17 00:00:00 2001
From: Nir Ben-Or <nir@swirldslabs.com>
Date: Fri, 20 Sep 2024 14:48:28 -0500
Subject: [PATCH 9/9] Changed the import command to a one-liner, and added a
 conditional revoke of the extra grant for GCP cloud sql instances

Signed-off-by: Nir Ben-Or <nir@swirldslabs.com>
---
 .../src/main/resources/db/scripts/bootstrap.sh            | 8 +-------
 .../src/main/resources/db/scripts/init.sh                 | 5 +++++
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh b/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
index e7127f636ed..a6a65c6ff25 100644
--- a/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
+++ b/hedera-mirror-importer/src/main/resources/db/scripts/bootstrap.sh
@@ -157,13 +157,7 @@ import_file() {
   write_tracking_file "$file" "IN_PROGRESS"
   log "Importing table $table from $file"
 
-  if {
-    echo "BEGIN;"
-    echo "\\copy $table FROM STDIN WITH CSV HEADER;"
-    gunzip -c "$file"
-    echo "\."
-    echo "COMMIT;"
-  } | psql -q -v ON_ERROR_STOP=1; then
+  if gunzip -c "$file" | psql -q -v ON_ERROR_STOP=1 -c "\COPY $table FROM STDIN WITH CSV HEADER"; then
     log "Successfully imported $file into $table"
     # Update the status to IMPORTED
     write_tracking_file "$file" "IMPORTED"
diff --git a/hedera-mirror-importer/src/main/resources/db/scripts/init.sh b/hedera-mirror-importer/src/main/resources/db/scripts/init.sh
index a7fcbf7e7ff..eb9d3b5c67d 100755
--- a/hedera-mirror-importer/src/main/resources/db/scripts/init.sh
+++ b/hedera-mirror-importer/src/main/resources/db/scripts/init.sh
@@ -120,4 +120,9 @@ ${DB_SPECIFIC_EXTENSION_SQL}
 -- Alter search path
 \connect postgres postgres
 alter database :dbName set search_path = :dbSchema, public, :tempSchema;
+
+-- Conditional REVOKE statement for Google Cloud SQL
+\if :isGcpCloudSql
+  revoke mirror_node from :pgUser;
+\endif
 __SQL__