Linux Mastery · Lesson 2 of 4

Shell Scripting Mastery: Bash Automation

Bash

Why Shell Scripting?

Bash scripts automate repetitive tasks — backups, deployments, log rotation, health checks. Every DevOps engineer and backend developer needs to write them. One 20-line script can save hours of manual work every week.

Bash
#!/bin/bash
# Your first script
echo "Hello, $(whoami) — scripting works!"
Bash
chmod +x hello.sh
./hello.sh

Script Structure and the Shebang

Bash
#!/bin/bash
# ─────────────────────────────────────────────────────
# Script: deploy.sh
# Purpose: Deploy application to production server
# Usage: ./deploy.sh [environment] [version]
# Author: Alice
# ─────────────────────────────────────────────────────

# Exit immediately on error, unset variables, pipe failures
set -euo pipefail

# IFS for safe word splitting
IFS=$'\n\t'

Always start scripts with:

  • #!/bin/bash — tells the OS which interpreter to use
  • set -euo pipefail — the safety net:
    • -e — exit on any command failure
    • -u — exit on unset variable
    • -o pipefail — pipeline fails if any command fails

Variables

Bash
# Assignment (NO spaces around =)
NAME="Alice"
AGE=30
TODAY=$(date +%Y-%m-%d)    # Command substitution

# Using variables
echo "$NAME is $AGE years old"
echo "Today is $TODAY"

# Readonly (like constants)
readonly MAX_RETRIES=3

# Unset
unset NAME

# String operations
FILENAME="report-2026-04-17.csv"
echo "${FILENAME%.csv}"         #  report-2026-04-17  (remove .csv suffix)
echo "${FILENAME#report-}"      #  2026-04-17.csv     (remove prefix)
echo "${FILENAME/2026/2025}"    #  report-2025-04-17.csv (replace)
echo "${#FILENAME}"             #  22  (length)
echo "${FILENAME^^}"            #  REPORT-2026-04-17.CSV (uppercase)
echo "${FILENAME,,}"            #  report-2026-04-17.csv (lowercase)

# Default values
PORT="${PORT:-8080}"           # Use 8080 if PORT is unset
DB_HOST="${DB_HOST:?DB_HOST must be set}"  # Exit if unset

Special Variables

Bash
$0      # Script name
$1, $2  # Positional arguments
$@      # All arguments as separate words
$#      # Number of arguments
$?      # Exit code of last command (0 = success)
$$      # PID of current shell
$!      # PID of last background process
$LINENO # Current line number
$FUNCNAME # Current function name
Bash
#!/bin/bash
set -euo pipefail

SCRIPT_NAME="$0"
ENVIRONMENT="${1:-dev}"     # Default to dev if not provided
VERSION="${2:-latest}"

echo "Deploying version $VERSION to $ENVIRONMENT"

Conditionals

Bash
# if / elif / else
if [[ "$ENVIRONMENT" == "prod" ]]; then
  echo "Production deploy — extra caution"
elif [[ "$ENVIRONMENT" == "staging" ]]; then
  echo "Staging deploy"
else
  echo "Dev deploy"
fi

# Numeric comparisons
MEMORY_MB=512
if (( MEMORY_MB >= 1024 )); then
  echo "High memory"
fi

# File tests
if [[ -f "/etc/nginx/nginx.conf" ]]; then
  echo "nginx config exists"
fi

if [[ -d "/var/log/myapp" ]]; then
  echo "log directory exists"
else
  mkdir -p "/var/log/myapp"
fi

# Common test operators
# -f file     regular file exists
# -d dir      directory exists
# -e path     path exists (any type)
# -r file     file is readable
# -w file     file is writable
# -x file     file is executable
# -s file     file exists and is non-empty
# -z string   string is empty
# -n string   string is non-empty
# == != <     string comparison (inside [[]])
# -eq -ne -lt -le -gt -ge  numeric comparison

Loops

Bash
# For loop  iterate over list
for server in web-1 web-2 web-3; do
  echo "Checking $server..."
  ssh "deploy@$server" "systemctl status myapp"
done

# For loop  C-style
for ((i=1; i<=5; i++)); do
  echo "Attempt $i"
done

# For loop  command output
for file in /var/log/*.log; do
  echo "Processing: $file"
  gzip "$file"
done

# While loop
RETRIES=0
while ! curl -sf http://localhost:8080/health; do
  RETRIES=$((RETRIES + 1))
  if (( RETRIES >= 5 )); then
    echo "Health check failed after $RETRIES attempts"
    exit 1
  fi
  echo "Waiting... attempt $RETRIES"
  sleep 2
done

# Read file line by line
while IFS= read -r line; do
  echo "Processing: $line"
done < "/etc/hosts"

# Until loop (runs until condition is true)
until [[ -f "/tmp/ready.flag" ]]; do
  echo "Waiting for ready flag..."
  sleep 1
done

Arrays

Bash
# Indexed array
SERVERS=("web-1" "web-2" "web-3")
echo "${SERVERS[0]}"          #  web-1
echo "${SERVERS[@]}"          #  web-1 web-2 web-3 (all)
echo "${#SERVERS[@]}"         #  3 (length)

SERVERS+=("web-4")             # Append
unset SERVERS[1]               # Remove element

# Loop over array
for server in "${SERVERS[@]}"; do
  echo "Deploying to $server"
done

# Associative array (like a hash map)
declare -A CONFIG
CONFIG[db_host]="prod-postgres.internal"
CONFIG[db_port]="5432"
CONFIG[db_name]="appdb"

echo "${CONFIG[db_host]}"
for key in "${!CONFIG[@]}"; do
  echo "$key = ${CONFIG[$key]}"
done

Functions

Bash
# Function definition
log() {
  local level="$1"        # 'local' scopes variable to function
  local message="$2"
  local timestamp
  timestamp=$(date '+%Y-%m-%d %H:%M:%S')
  echo "[$timestamp] [$level] $message" | tee -a /var/log/deploy.log
}

# Call function
log "INFO"  "Starting deployment"
log "ERROR" "Connection failed"

# Function with return value
is_port_open() {
  local host="$1"
  local port="$2"
  nc -z -w3 "$host" "$port" 2>/dev/null
  return $?    # 0 = open, 1 = closed
}

if is_port_open "prod-db.internal" 5432; then
  log "INFO" "Database port is reachable"
else
  log "ERROR" "Cannot reach database"
  exit 1
fi

# Return a value (via echo, not return)
get_git_branch() {
  git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "unknown"
}

BRANCH=$(get_git_branch)
log "INFO" "Deploying branch: $BRANCH"

Error Handling

Bash
# Trap errors  run cleanup on exit
cleanup() {
  local exit_code=$?
  if (( exit_code != 0 )); then
    log "ERROR" "Script failed with exit code $exit_code on line $LINENO"
    # Rollback if needed
    rollback
  fi
  # Always clean up temp files
  rm -f /tmp/deploy_*.tmp
}
trap cleanup EXIT

# Trap specific signals
trap 'log "WARN" "Script interrupted"; exit 130' INT TERM

rollback() {
  log "WARN" "Rolling back..."
  # ... rollback logic
}

# Custom error handler
die() {
  log "ERROR" "$1"
  exit "${2:-1}"
}

[[ -f "$CONFIG_FILE" ]] || die "Config file not found: $CONFIG_FILE"

Argument Parsing

Bash
#!/bin/bash
set -euo pipefail

# Usage function
usage() {
  cat <<EOF
Usage: $0 [OPTIONS] ENVIRONMENT

Deploy the application to the specified environment.

Arguments:
  ENVIRONMENT     Target environment (dev|staging|prod)

Options:
  -v, --version   VERSION   Docker image version to deploy (default: latest)
  -d, --dry-run             Show what would be done without doing it
  -f, --force               Skip confirmation prompts
  -h, --help                Show this help message

Examples:
  $0 prod -v 1.2.3
  $0 staging --dry-run
EOF
}

# Defaults
VERSION="latest"
DRY_RUN=false
FORCE=false
ENVIRONMENT=""

# Parse arguments
while [[ $# -gt 0 ]]; do
  case "$1" in
    -v|--version)
      VERSION="$2"
      shift 2
      ;;
    -d|--dry-run)
      DRY_RUN=true
      shift
      ;;
    -f|--force)
      FORCE=true
      shift
      ;;
    -h|--help)
      usage
      exit 0
      ;;
    -*)
      echo "Unknown option: $1" >&2
      usage >&2
      exit 1
      ;;
    *)
      ENVIRONMENT="$1"
      shift
      ;;
  esac
done

# Validate required argument
[[ -n "$ENVIRONMENT" ]] || { usage >&2; exit 1; }
[[ "$ENVIRONMENT" =~ ^(dev|staging|prod)$ ]] || die "Invalid environment: $ENVIRONMENT"

echo "Deploying $VERSION to $ENVIRONMENT (dry-run: $DRY_RUN)"

Text Processing: grep, sed, awk

Bash
# grep: search patterns
grep "ERROR" /var/log/app.log
grep -r "database" /etc/myapp/        # Recursive
grep -c "ERROR" /var/log/app.log      # Count matches
grep -v "DEBUG" /var/log/app.log      # Invert (exclude DEBUG)
grep -E "ERROR|WARN" /var/log/app.log # Extended regex
grep -o '[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}' access.log  # Extract IPs

# sed: stream editor
sed 's/localhost/prod-db.internal/g' config.template > config.yaml  # Replace
sed -n '10,20p' file.txt              # Print lines 10-20
sed '/^#/d' config.conf              # Delete comment lines
sed -i.bak 's/old/new/g' file.txt    # Edit in-place (with backup)

# awk: data processing
awk '{print $1, $4}' access.log      # Print columns 1 and 4
awk -F, '{print $2}' data.csv        # CSV: print column 2
awk '$9 == "404" {print $7}' access.log  # URLs with 404 status
awk '{sum += $1} END {print sum}' numbers.txt  # Sum column
awk 'NR%100==0 {print NR, $0}' large.txt  # Every 100th line

Real-World Scripts

1. Health Check & Auto-Restart

Bash
#!/bin/bash
set -euo pipefail

SERVICE="myapp"
HEALTH_URL="http://localhost:8080/health"
MAX_FAILURES=3
FAILURES=0

check_health() {
  curl -sf --max-time 5 "$HEALTH_URL" > /dev/null 2>&1
}

while true; do
  if check_health; then
    FAILURES=0
    echo "$(date): $SERVICE is healthy"
  else
    FAILURES=$((FAILURES + 1))
    echo "$(date): $SERVICE health check failed ($FAILURES/$MAX_FAILURES)"
    
    if (( FAILURES >= MAX_FAILURES )); then
      echo "$(date): Restarting $SERVICE..."
      systemctl restart "$SERVICE"
      FAILURES=0
      # Alert
      curl -s -X POST "$SLACK_WEBHOOK" \
        -H 'Content-type: application/json' \
        -d "{\"text\":\"⚠️ $SERVICE restarted on $(hostname)\"}"
    fi
  fi
  sleep 30
done

2. Database Backup Script

Bash
#!/bin/bash
set -euo pipefail

DB_HOST="${DB_HOST:?required}"
DB_NAME="${DB_NAME:?required}"
DB_USER="${DB_USER:?required}"
BACKUP_DIR="/var/backups/postgres"
RETENTION_DAYS=30
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="$BACKUP_DIR/${DB_NAME}_${TIMESTAMP}.sql.gz"

mkdir -p "$BACKUP_DIR"

echo "Starting backup of $DB_NAME..."
PGPASSWORD="$DB_PASSWORD" pg_dump \
  -h "$DB_HOST" \
  -U "$DB_USER" \
  --no-password \
  "$DB_NAME" | gzip > "$BACKUP_FILE"

SIZE=$(du -sh "$BACKUP_FILE" | cut -f1)
echo "Backup complete: $BACKUP_FILE ($SIZE)"

# Upload to S3
aws s3 cp "$BACKUP_FILE" "s3://myapp-backups/postgres/" --storage-class STANDARD_IA
echo "Uploaded to S3"

# Delete old local backups
find "$BACKUP_DIR" -type f -name "*.sql.gz" -mtime "+$RETENTION_DAYS" -delete
echo "Cleaned up backups older than $RETENTION_DAYS days"

3. Deployment Script

Bash
#!/bin/bash
set -euo pipefail

ENVIRONMENT="${1:?Usage: $0 <environment> <version>}"
VERSION="${2:?Usage: $0 <environment> <version>}"
APP_DIR="/opt/myapp"
SERVICE="myapp"

log() { echo "[$(date '+%H:%M:%S')] $*"; }
die() { log "ERROR: $1"; exit 1; }

# Pre-flight checks
[[ "$ENVIRONMENT" =~ ^(dev|staging|prod)$ ]] || die "Invalid environment"
command -v docker >/dev/null || die "Docker not installed"
systemctl is-active --quiet postgresql || die "PostgreSQL not running"

log "Deploying $VERSION to $ENVIRONMENT..."

# Pull new image
log "Pulling docker image: myapp:$VERSION"
docker pull "myregistry.io/myapp:$VERSION"

# Backup current version
CURRENT_VERSION=$(docker inspect "$SERVICE" --format '{{.Config.Image}}' 2>/dev/null || echo "none")
log "Current version: $CURRENT_VERSION"

# Run database migrations
log "Running database migrations..."
docker run --rm \
  --network host \
  --env-file "/etc/myapp/$ENVIRONMENT.env" \
  "myregistry.io/myapp:$VERSION" \
  python manage.py migrate

# Zero-downtime swap
log "Swapping containers..."
docker stop "$SERVICE" 2>/dev/null || true
docker rm "$SERVICE" 2>/dev/null || true
docker run -d \
  --name "$SERVICE" \
  --restart always \
  --network host \
  --env-file "/etc/myapp/$ENVIRONMENT.env" \
  "myregistry.io/myapp:$VERSION"

# Health check
log "Waiting for health check..."
RETRIES=0
until curl -sf http://localhost:8080/health; do
  RETRIES=$((RETRIES + 1))
  (( RETRIES < 15 )) || die "Health check failed after 30s"
  sleep 2
done

log "Deployment complete: $VERSION is live on $ENVIRONMENT"

Cron Jobs

Bash
# Edit crontab
crontab -e

# Cron syntax: min hour dom month dow command
# ┌──────────── minute (0-59)
#  ┌────────── hour (0-23)
#   ┌──────── day of month (1-31)
#    ┌────── month (1-12)
#     ┌──── day of week (0-7, 0=Sun)
#     
# * * * * * /path/to/command

0  2 * * *   /opt/scripts/backup.sh          # Daily at 2:00 AM
0  */4 * * * /opt/scripts/health-check.sh    # Every 4 hours
*/5 * * * *  /opt/scripts/log-rotate.sh      # Every 5 minutes
0  9 * * 1   /opt/scripts/weekly-report.sh   # Monday 9 AM

# Redirect output
0 2 * * * /opt/scripts/backup.sh >> /var/log/backup.log 2>&1

# Use @reboot to run on startup
@reboot /opt/myapp/start.sh

Debugging

Bash
# Trace execution (print each command)
bash -x script.sh
# Or add to script:
set -x       # Enable trace
# ... code ...
set +x       # Disable trace

# Dry run (check syntax only)
bash -n script.sh

# Debug individual section
set -x
suspicious_command
set +x

# Print variable values
declare -p MYVAR     # Show variable name, attributes, and value

Best Practices

Bash
# 1. Quote all variables
echo "$filename"          # Safe
echo $filename            # Breaks on spaces

# 2. Use [[ ]] not [ ] for tests
[[ -f "$file" ]]          # Better: regex support, no word splitting
[ -f "$file" ]            # Older, more limited

# 3. Use $() not backticks
TODAY=$(date +%Y-%m-%d)   # Clear nesting
TODAY=`date`              # Hard to nest, deprecated style

# 4. Print errors to stderr
echo "Error: something failed" >&2

# 5. Use mktemp for temp files
TMPFILE=$(mktemp /tmp/script.XXXXXX)
trap 'rm -f "$TMPFILE"' EXIT

# 6. Check command exists before using it
command -v docker >/dev/null || { echo "docker not found"; exit 1; }

# 7. Use local in functions
myfunc() {
  local result="$1"   # Won't pollute global scope
}

Summary

| Concept | Example | |---------|---------| | Safety header | set -euo pipefail | | Variables | NAME="value", ${NAME:-default} | | Conditions | [[ -f file ]], (( num > 5 )) | | Loops | for item in list; do ... done | | Functions | myfunc() { local x="$1"; ... } | | Error trap | trap cleanup EXIT | | Arg parsing | while [[ $# -gt 0 ]]; do case "$1"... | | Text | grep, sed 's/old/new/g', awk '{print $2}' | | Cron | 0 2 * * * /opt/scripts/backup.sh | | Debug | bash -x script.sh |

Shell scripting is the glue of infrastructure. One well-written script automates away hours of manual work — for you and your entire team.