minecraft-server/scripts/backup.sh

#!/usr/bin/env bash
# /opt/docker/backup.sh
#
# Daily backup of all Docker service databases, named volumes, and the
# Minecraft world to /opt/backups/. Runs as root via cron at 02:00 with
# 7-day retention.
#
# Phase 1 of BACKUP-STRATEGY.md ("stop the bleeding") — repairs the
# orphaned synapse-signing-key block that was killing the script under
# `set -e` before the Minecraft section ran. Also adds structured
# logging and a sentinel `.last-success` file so silent failures are
# detectable from outside the script.
#
# A separate Phase 2 (restic playerdata snapshots every 5 min) is
# delivered by scripts/restic-backup-playerdata.sh + the systemd unit
# pair under scripts/systemd/. This file remains the safety net.
set -euo pipefail
umask 077

BACKUP_DIR="/opt/backups"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_PATH="${BACKUP_DIR}/${TIMESTAMP}"
LOG="${BACKUP_DIR}/backup.log"
SENTINEL="${BACKUP_DIR}/.last-success"
KEEP_DAYS=7

# Track whether each backup arm succeeded so we can honour the
# sentinel contract: only stamp .last-success if the *world* (the
# critical T1 case) was captured. Other arms can fail without
# blocking the sentinel — they have their own logged FAILED lines.
MC_WORLD_OK=0

log() {
    printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" | tee -a "$LOG"
}

mkdir -p "$BACKUP_PATH"
log "=== Backup started: ${TIMESTAMP} ==="

# ── Matrix PostgreSQL ──────────────────────────────────────────────
log "Dumping Matrix PostgreSQL..."
if docker ps --format '{{.Names}}' | grep -q '^matrix-postgres$'; then
    if docker exec matrix-postgres pg_dump -U synapse synapse \
        | gzip > "${BACKUP_PATH}/matrix-postgres-${TIMESTAMP}.sql.gz"; then
        log "  Matrix Postgres: OK ($(du -sh "${BACKUP_PATH}/matrix-postgres-${TIMESTAMP}.sql.gz" | cut -f1))"
    else
        log "  Matrix Postgres: FAILED"
    fi
else
    log "  matrix-postgres not running — skipping"
fi

# ── Rocket.Chat MongoDB ────────────────────────────────────────────
log "Dumping Rocket.Chat MongoDB..."
if docker ps --format '{{.Names}}' | grep -q '^mongodb$'; then
    if docker exec mongodb mongodump \
        -u admin -p CHANGE_ME_MONGO_ADMIN_PASSWORD \
        --authenticationDatabase admin \
        --db rocketchat \
        --archive \
        | gzip > "${BACKUP_PATH}/rocketchat-mongo-${TIMESTAMP}.archive.gz"; then
        log "  MongoDB: OK ($(du -sh "${BACKUP_PATH}/rocketchat-mongo-${TIMESTAMP}.archive.gz" | cut -f1))"
    else
        log "  MongoDB: FAILED"
    fi
else
    log "  mongodb not running — skipping"
fi

# ── Named Docker volumes ───────────────────────────────────────────
log "Backing up Docker volumes..."
for VOLUME in synapse-media rocketchat-uploads; do
    if docker volume ls --format '{{.Name}}' | grep -q "^matrix_${VOLUME}\|^rocketchat_${VOLUME}\|^${VOLUME}$"; then
        ACTUAL_VOL=$(docker volume ls --format '{{.Name}}' | grep "${VOLUME}" | head -1)
        if docker run --rm \
            -v "${ACTUAL_VOL}:/volume:ro" \
            -v "${BACKUP_PATH}:/backup" \
            alpine \
            tar czf "/backup/${VOLUME}-${TIMESTAMP}.tar.gz" -C /volume . ; then
            log "  Volume ${VOLUME}: OK"
        else
            log "  Volume ${VOLUME}: FAILED"
        fi
    else
        log "  Volume ${VOLUME}: not found — skipping"
    fi
done

# ── Config files (bind mounts) ─────────────────────────────────────
log "Backing up config directories..."
if tar czf "${BACKUP_PATH}/configs-${TIMESTAMP}.tar.gz" \
    /opt/docker/traefik/traefik.yml \
    /opt/docker/traefik/config/ \
    /opt/docker/matrix/docker-compose.yml \
    /opt/docker/matrix/element-config/ \
    /opt/docker/matrix/synapse-config/homeserver.yaml \
    /opt/docker/matrix/synapse-config/matrix.example.com.log.config \
    /opt/docker/rocketchat/docker-compose.yml \
    2>/dev/null; then
    log "  Configs: OK"
else
    log "  Configs: partial (some files missing)"
fi

# Synapse signing key — sensitive, copy out separately with tight perms.
if [ -f /opt/docker/matrix/synapse-config/matrix.example.com.signing.key ]; then
    cp /opt/docker/matrix/synapse-config/matrix.example.com.signing.key \
        "${BACKUP_PATH}/synapse-signing-key-${TIMESTAMP}.key"
    chmod 600 "${BACKUP_PATH}/synapse-signing-key-${TIMESTAMP}.key"
    log "  Synapse signing key: backed up (600)"
fi

# ── Minecraft server ───────────────────────────────────────────────
# This is the block that was missing from the deployed copy and
# corrupted by an orphaned synapse-signing-key fragment in the repo
# copy. Wrapped in a subshell so a failure here does NOT exit the
# whole script under `set -e` — we want the prune step and sentinel
# logic to still run.
log "Backing up Minecraft server..."

# tar exit codes: 0 = clean, 1 = "some files differed/changed during read"
# (NORMAL on a live MC server — chunks save while we read), 2 = fatal.
# Treat 0 and 1 as success, 2+ as failure.
tar_ok() { local rc=$1; [ "$rc" -le 1 ]; }

mc_backup() {
    if docker ps --format '{{.Names}}' | grep -q '^minecraft-mc$'; then
        # Server running — flush via rcon if mcrcon installed, then
        # tar inside the container so we get a consistent point-in-time.
        if command -v mcrcon >/dev/null 2>&1; then
            mcrcon -H 127.0.0.1 -P 25575 \
                -p "${MC_RCON_PASSWORD:-*redacted*}" \
                -w 1 "save-all flush" >/dev/null 2>&1 || true
        fi

        # World tar — runs inside the container. We ignore tar exit 1
        # ("file changed as we read it") because that's expected on a
        # live server and the resulting archive is still usable.
        local tar_rc=0
        docker exec minecraft-mc bash -c \
            "cd /data && tar czf /tmp/mc-world-backup-${TIMESTAMP}.tar.gz world/ world_nether/ world_the_end/" \
            >/dev/null 2>&1 || tar_rc=$?
        if tar_ok "$tar_rc" \
            && docker cp "minecraft-mc:/tmp/mc-world-backup-${TIMESTAMP}.tar.gz" "${BACKUP_PATH}/" >/dev/null 2>&1 \
            && docker exec minecraft-mc rm -f "/tmp/mc-world-backup-${TIMESTAMP}.tar.gz" >/dev/null 2>&1; then
            local sz
            sz=$(du -sh "${BACKUP_PATH}/mc-world-backup-${TIMESTAMP}.tar.gz" | cut -f1)
            if [ "$tar_rc" -eq 1 ]; then
                log "  Minecraft world: OK (${sz}) [tar exit 1 — files changed during read, expected on live server]"
            else
                log "  Minecraft world: OK (${sz})"
            fi
            MC_WORLD_OK=1
        else
            log "  Minecraft world: FAILED (tar_rc=${tar_rc})"
            # Best-effort cleanup of any half-written file inside the container.
            docker exec minecraft-mc rm -f "/tmp/mc-world-backup-${TIMESTAMP}.tar.gz" >/dev/null 2>&1 || true
        fi

        # Plugins (jars + on-disk config) — small, do this regardless
        # of world result so we always have plugin state on hand.
        # `--ignore-failed-read` suppresses spark profiler tmp files
        # (running JFR files briefly mode 600); `--warning=no-file-changed`
        # silences CoreProtect db noise in the log.
        local prc=0
        tar --ignore-failed-read --warning=no-file-changed \
            -czf "${BACKUP_PATH}/minecraft-plugins-${TIMESTAMP}.tar.gz" \
            -C /opt/docker/minecraft plugins/ >/dev/null 2>&1 || prc=$?
        if tar_ok "$prc"; then
            log "  Minecraft plugins: OK ($(du -sh "${BACKUP_PATH}/minecraft-plugins-${TIMESTAMP}.tar.gz" | cut -f1))"
        else
            log "  Minecraft plugins: FAILED (rc=${prc})"
        fi

        # Plugin DBs — copied (not dumped, all SQLite/file-based) into
        # a tagged tarball so restore is straightforward.
        local drc=0
        tar --ignore-failed-read --warning=no-file-changed \
            -czf "${BACKUP_PATH}/minecraft-dbs-${TIMESTAMP}.tar.gz" \
            -C /opt/docker/minecraft \
            homestead_data.db \
            plugins/AuthMe/authme.db \
            plugins/CoreProtect/database.db \
            plugins/LuckPerms/ \
            >/dev/null 2>&1 || drc=$?
        if tar_ok "$drc"; then
            log "  Minecraft DBs: OK ($(du -sh "${BACKUP_PATH}/minecraft-dbs-${TIMESTAMP}.tar.gz" | cut -f1))"
        else
            log "  Minecraft DBs: partial (rc=${drc} — some files may be missing)"
        fi

        # Server-side configs and access lists. Some of these files are
        # optional (eg whitelist.json absent when whitelisting is off).
        # tar reports rc=2 for missing files, so we prefilter the list.
        local cfg_files=()
        for f in server.properties purpur.yml spigot.yml bukkit.yml \
                 commands.yml help.yml permissions.yml \
                 ops.json whitelist.json banned-players.json banned-ips.json \
                 usercache.json eula.txt docker-compose.yml; do
            [ -e "/opt/docker/minecraft/$f" ] && cfg_files+=("$f")
        done
        local crc=0
        tar czf "${BACKUP_PATH}/minecraft-configs-${TIMESTAMP}.tar.gz" \
            -C /opt/docker/minecraft "${cfg_files[@]}" \
            >/dev/null 2>&1 || crc=$?
        if tar_ok "$crc"; then
            log "  Minecraft configs: OK (${#cfg_files[@]} files)"
        else
            log "  Minecraft configs: FAILED (rc=${crc})"
        fi
    else
        # Server stopped — back up everything from disk directly.
        local frc=0
        tar czf "${BACKUP_PATH}/minecraft-full-backup-${TIMESTAMP}.tar.gz" \
            -C /opt/docker/minecraft \
            world/ \
            world_nether/ \
            world_the_end/ \
            plugins/ \
            homestead_data.db \
            server.properties \
            purpur.yml \
            spigot.yml \
            bukkit.yml \
            ops.json \
            whitelist.json \
            banned-players.json \
            banned-ips.json \
            usercache.json \
            docker-compose.yml \
            >/dev/null 2>&1 || frc=$?
        if tar_ok "$frc"; then
            log "  Minecraft (full, offline): OK ($(du -sh "${BACKUP_PATH}/minecraft-full-backup-${TIMESTAMP}.tar.gz" | cut -f1))"
            MC_WORLD_OK=1
        else
            log "  Minecraft (offline): partial (rc=${frc})"
        fi
    fi
}

# Run MC arm — never let it kill the rest of the script.
if ! mc_backup; then
    log "  Minecraft arm exited non-zero — see lines above"
fi

# ── Prune old backups ──────────────────────────────────────────────
log "Pruning backups older than ${KEEP_DAYS} days..."
find "$BACKUP_DIR" -maxdepth 1 -type d -mtime "+${KEEP_DAYS}" -exec rm -rf {} + 2>/dev/null || true
find "$BACKUP_DIR" -maxdepth 1 -name "*.log" -mtime +30 -delete 2>/dev/null || true

BACKUP_SIZE=$(du -sh "$BACKUP_PATH" | cut -f1)
log "=== Backup complete: ${BACKUP_PATH} (${BACKUP_SIZE}) ==="

# ── Sentinel ───────────────────────────────────────────────────────
# Touch the sentinel only if the world (T1 case) was captured. An
# external monitor (cron on onyx, or ntfy/healthchecks once wired)
# can alert on `find /opt/backups/.last-success -mmin +1500` to catch
# silent failures within 25h of a missed daily run.
if [ "$MC_WORLD_OK" -eq 1 ]; then
    {
        printf 'last_success=%s\n' "$(date -Iseconds)"
        printf 'backup_path=%s\n'  "$BACKUP_PATH"
        printf 'backup_size=%s\n'  "$BACKUP_SIZE"
    } > "$SENTINEL"
    log "Sentinel updated: ${SENTINEL}"
else
    log "WARNING: world backup did NOT succeed — sentinel NOT updated"
fi