Repairs the orphaned synapse-signing-key block at scripts/backup.sh
lines 119-122 that was exiting the script under set -e before the
Minecraft block could run, leaving 5 of the last 7 days without a
world backup and zero usable snapshots after 7-day retention.
Phase 1 (deployed today to /opt/docker/backup.sh on nullstone):
- Repaired script — orphan block removed, MC arm wrapped so failures
in one tar don't kill the run
- tar exit code 1 ("file changed as we read it") now treated as
success on the live MC world; spark profiler tmp file noise
silenced via --ignore-failed-read --warning=no-file-changed
- Plugin DBs (homestead, AuthMe, CoreProtect, LuckPerms) and configs
now backed up alongside the world
- Sentinel /opt/backups/.last-success stamped only when the world
arm succeeds — gives outside monitors a single mtime to alert on
- Manually verified end-to-end: 12G world tarball, 492M plugins,
279M dbs, 14 config files, sentinel updated. Pre-fix script saved
at /opt/docker/backup.sh.bak-20260507-pre-phase1.
Phase 2 (scripts in repo, deployment pending operator sudo):
- scripts/restic-backup-playerdata.sh — Class A 5-min restic snapshots
of playerdata/, stats/, advancements/, plugin DBs, LuckPerms;
rcon save-all flush before snapshot; tag-scoped retention
- scripts/restic-init.sh — one-time bootstrap (root-only) for
/etc/mc-backup.{env,pw} + repo init at /home/user/restic/
- scripts/systemd/mc-backup-playerdata.{service,timer} — 5-min timer
with hardening (ProtectSystem=strict, ReadOnlyPaths, etc)
- docs/RUNBOOK-BACKUP-RESTORE.md updated with both phases'
deployment steps and the operator-action checklist
Off-host mirror to onyx (Phase 4) and class B/C/D world snapshots
(Phase 3) are still TODO — see BACKUP-STRATEGY.md §11 phase plan.
268 lines
12 KiB
Bash
Executable file
268 lines
12 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# /opt/docker/backup.sh
|
|
#
|
|
# Daily backup of all Docker service databases, named volumes, and the
|
|
# Minecraft world to /opt/backups/. Runs as root via cron at 02:00 with
|
|
# 7-day retention.
|
|
#
|
|
# Phase 1 of BACKUP-STRATEGY.md ("stop the bleeding") — repairs the
|
|
# orphaned synapse-signing-key block that was killing the script under
|
|
# `set -e` before the Minecraft section ran. Also adds structured
|
|
# logging and a sentinel `.last-success` file so silent failures are
|
|
# detectable from outside the script.
|
|
#
|
|
# A separate Phase 2 (restic playerdata snapshots every 5 min) is
|
|
# delivered by scripts/restic-backup-playerdata.sh + the systemd unit
|
|
# pair under scripts/systemd/. This file remains the safety net.
|
|
set -euo pipefail
|
|
umask 077
|
|
|
|
BACKUP_DIR="/opt/backups"
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
BACKUP_PATH="${BACKUP_DIR}/${TIMESTAMP}"
|
|
LOG="${BACKUP_DIR}/backup.log"
|
|
SENTINEL="${BACKUP_DIR}/.last-success"
|
|
KEEP_DAYS=7
|
|
|
|
# Track whether each backup arm succeeded so we can honour the
|
|
# sentinel contract: only stamp .last-success if the *world* (the
|
|
# critical T1 case) was captured. Other arms can fail without
|
|
# blocking the sentinel — they have their own logged FAILED lines.
|
|
MC_WORLD_OK=0
|
|
|
|
log() {
|
|
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" | tee -a "$LOG"
|
|
}
|
|
|
|
mkdir -p "$BACKUP_PATH"
|
|
log "=== Backup started: ${TIMESTAMP} ==="
|
|
|
|
# ── Matrix PostgreSQL ──────────────────────────────────────────────
|
|
log "Dumping Matrix PostgreSQL..."
|
|
if docker ps --format '{{.Names}}' | grep -q '^matrix-postgres$'; then
|
|
if docker exec matrix-postgres pg_dump -U synapse synapse \
|
|
| gzip > "${BACKUP_PATH}/matrix-postgres-${TIMESTAMP}.sql.gz"; then
|
|
log " Matrix Postgres: OK ($(du -sh "${BACKUP_PATH}/matrix-postgres-${TIMESTAMP}.sql.gz" | cut -f1))"
|
|
else
|
|
log " Matrix Postgres: FAILED"
|
|
fi
|
|
else
|
|
log " matrix-postgres not running — skipping"
|
|
fi
|
|
|
|
# ── Rocket.Chat MongoDB ────────────────────────────────────────────
|
|
log "Dumping Rocket.Chat MongoDB..."
|
|
if docker ps --format '{{.Names}}' | grep -q '^mongodb$'; then
|
|
if docker exec mongodb mongodump \
|
|
-u admin -p CHANGE_ME_MONGO_ADMIN_PASSWORD \
|
|
--authenticationDatabase admin \
|
|
--db rocketchat \
|
|
--archive \
|
|
| gzip > "${BACKUP_PATH}/rocketchat-mongo-${TIMESTAMP}.archive.gz"; then
|
|
log " MongoDB: OK ($(du -sh "${BACKUP_PATH}/rocketchat-mongo-${TIMESTAMP}.archive.gz" | cut -f1))"
|
|
else
|
|
log " MongoDB: FAILED"
|
|
fi
|
|
else
|
|
log " mongodb not running — skipping"
|
|
fi
|
|
|
|
# ── Named Docker volumes ───────────────────────────────────────────
|
|
log "Backing up Docker volumes..."
|
|
for VOLUME in synapse-media rocketchat-uploads; do
|
|
if docker volume ls --format '{{.Name}}' | grep -q "^matrix_${VOLUME}\|^rocketchat_${VOLUME}\|^${VOLUME}$"; then
|
|
ACTUAL_VOL=$(docker volume ls --format '{{.Name}}' | grep "${VOLUME}" | head -1)
|
|
if docker run --rm \
|
|
-v "${ACTUAL_VOL}:/volume:ro" \
|
|
-v "${BACKUP_PATH}:/backup" \
|
|
alpine \
|
|
tar czf "/backup/${VOLUME}-${TIMESTAMP}.tar.gz" -C /volume . ; then
|
|
log " Volume ${VOLUME}: OK"
|
|
else
|
|
log " Volume ${VOLUME}: FAILED"
|
|
fi
|
|
else
|
|
log " Volume ${VOLUME}: not found — skipping"
|
|
fi
|
|
done
|
|
|
|
# ── Config files (bind mounts) ─────────────────────────────────────
|
|
log "Backing up config directories..."
|
|
if tar czf "${BACKUP_PATH}/configs-${TIMESTAMP}.tar.gz" \
|
|
/opt/docker/traefik/traefik.yml \
|
|
/opt/docker/traefik/config/ \
|
|
/opt/docker/matrix/docker-compose.yml \
|
|
/opt/docker/matrix/element-config/ \
|
|
/opt/docker/matrix/synapse-config/homeserver.yaml \
|
|
/opt/docker/matrix/synapse-config/matrix.example.com.log.config \
|
|
/opt/docker/rocketchat/docker-compose.yml \
|
|
2>/dev/null; then
|
|
log " Configs: OK"
|
|
else
|
|
log " Configs: partial (some files missing)"
|
|
fi
|
|
|
|
# Synapse signing key — sensitive, copy out separately with tight perms.
|
|
if [ -f /opt/docker/matrix/synapse-config/matrix.example.com.signing.key ]; then
|
|
cp /opt/docker/matrix/synapse-config/matrix.example.com.signing.key \
|
|
"${BACKUP_PATH}/synapse-signing-key-${TIMESTAMP}.key"
|
|
chmod 600 "${BACKUP_PATH}/synapse-signing-key-${TIMESTAMP}.key"
|
|
log " Synapse signing key: backed up (600)"
|
|
fi
|
|
|
|
# ── Minecraft server ───────────────────────────────────────────────
|
|
# This is the block that was missing from the deployed copy and
|
|
# corrupted by an orphaned synapse-signing-key fragment in the repo
|
|
# copy. Wrapped in a subshell so a failure here does NOT exit the
|
|
# whole script under `set -e` — we want the prune step and sentinel
|
|
# logic to still run.
|
|
log "Backing up Minecraft server..."
|
|
|
|
# tar exit codes: 0 = clean, 1 = "some files differed/changed during read"
|
|
# (NORMAL on a live MC server — chunks save while we read), 2 = fatal.
|
|
# Treat 0 and 1 as success, 2+ as failure.
|
|
tar_ok() { local rc=$1; [ "$rc" -le 1 ]; }
|
|
|
|
mc_backup() {
|
|
if docker ps --format '{{.Names}}' | grep -q '^minecraft-mc$'; then
|
|
# Server running — flush via rcon if mcrcon installed, then
|
|
# tar inside the container so we get a consistent point-in-time.
|
|
if command -v mcrcon >/dev/null 2>&1; then
|
|
mcrcon -H 127.0.0.1 -P 25575 \
|
|
-p "${MC_RCON_PASSWORD:-*redacted*}" \
|
|
-w 1 "save-all flush" >/dev/null 2>&1 || true
|
|
fi
|
|
|
|
# World tar — runs inside the container. We ignore tar exit 1
|
|
# ("file changed as we read it") because that's expected on a
|
|
# live server and the resulting archive is still usable.
|
|
local tar_rc=0
|
|
docker exec minecraft-mc bash -c \
|
|
"cd /data && tar czf /tmp/mc-world-backup-${TIMESTAMP}.tar.gz world/ world_nether/ world_the_end/" \
|
|
>/dev/null 2>&1 || tar_rc=$?
|
|
if tar_ok "$tar_rc" \
|
|
&& docker cp "minecraft-mc:/tmp/mc-world-backup-${TIMESTAMP}.tar.gz" "${BACKUP_PATH}/" >/dev/null 2>&1 \
|
|
&& docker exec minecraft-mc rm -f "/tmp/mc-world-backup-${TIMESTAMP}.tar.gz" >/dev/null 2>&1; then
|
|
local sz
|
|
sz=$(du -sh "${BACKUP_PATH}/mc-world-backup-${TIMESTAMP}.tar.gz" | cut -f1)
|
|
if [ "$tar_rc" -eq 1 ]; then
|
|
log " Minecraft world: OK (${sz}) [tar exit 1 — files changed during read, expected on live server]"
|
|
else
|
|
log " Minecraft world: OK (${sz})"
|
|
fi
|
|
MC_WORLD_OK=1
|
|
else
|
|
log " Minecraft world: FAILED (tar_rc=${tar_rc})"
|
|
# Best-effort cleanup of any half-written file inside the container.
|
|
docker exec minecraft-mc rm -f "/tmp/mc-world-backup-${TIMESTAMP}.tar.gz" >/dev/null 2>&1 || true
|
|
fi
|
|
|
|
# Plugins (jars + on-disk config) — small, do this regardless
|
|
# of world result so we always have plugin state on hand.
|
|
# `--ignore-failed-read` suppresses spark profiler tmp files
|
|
# (running JFR files briefly mode 600); `--warning=no-file-changed`
|
|
# silences CoreProtect db noise in the log.
|
|
local prc=0
|
|
tar --ignore-failed-read --warning=no-file-changed \
|
|
-czf "${BACKUP_PATH}/minecraft-plugins-${TIMESTAMP}.tar.gz" \
|
|
-C /opt/docker/minecraft plugins/ >/dev/null 2>&1 || prc=$?
|
|
if tar_ok "$prc"; then
|
|
log " Minecraft plugins: OK ($(du -sh "${BACKUP_PATH}/minecraft-plugins-${TIMESTAMP}.tar.gz" | cut -f1))"
|
|
else
|
|
log " Minecraft plugins: FAILED (rc=${prc})"
|
|
fi
|
|
|
|
# Plugin DBs — copied (not dumped, all SQLite/file-based) into
|
|
# a tagged tarball so restore is straightforward.
|
|
local drc=0
|
|
tar --ignore-failed-read --warning=no-file-changed \
|
|
-czf "${BACKUP_PATH}/minecraft-dbs-${TIMESTAMP}.tar.gz" \
|
|
-C /opt/docker/minecraft \
|
|
homestead_data.db \
|
|
plugins/AuthMe/authme.db \
|
|
plugins/CoreProtect/database.db \
|
|
plugins/LuckPerms/ \
|
|
>/dev/null 2>&1 || drc=$?
|
|
if tar_ok "$drc"; then
|
|
log " Minecraft DBs: OK ($(du -sh "${BACKUP_PATH}/minecraft-dbs-${TIMESTAMP}.tar.gz" | cut -f1))"
|
|
else
|
|
log " Minecraft DBs: partial (rc=${drc} — some files may be missing)"
|
|
fi
|
|
|
|
# Server-side configs and access lists. Some of these files are
|
|
# optional (eg whitelist.json absent when whitelisting is off).
|
|
# tar reports rc=2 for missing files, so we prefilter the list.
|
|
local cfg_files=()
|
|
for f in server.properties purpur.yml spigot.yml bukkit.yml \
|
|
commands.yml help.yml permissions.yml \
|
|
ops.json whitelist.json banned-players.json banned-ips.json \
|
|
usercache.json eula.txt docker-compose.yml; do
|
|
[ -e "/opt/docker/minecraft/$f" ] && cfg_files+=("$f")
|
|
done
|
|
local crc=0
|
|
tar czf "${BACKUP_PATH}/minecraft-configs-${TIMESTAMP}.tar.gz" \
|
|
-C /opt/docker/minecraft "${cfg_files[@]}" \
|
|
>/dev/null 2>&1 || crc=$?
|
|
if tar_ok "$crc"; then
|
|
log " Minecraft configs: OK (${#cfg_files[@]} files)"
|
|
else
|
|
log " Minecraft configs: FAILED (rc=${crc})"
|
|
fi
|
|
else
|
|
# Server stopped — back up everything from disk directly.
|
|
local frc=0
|
|
tar czf "${BACKUP_PATH}/minecraft-full-backup-${TIMESTAMP}.tar.gz" \
|
|
-C /opt/docker/minecraft \
|
|
world/ \
|
|
world_nether/ \
|
|
world_the_end/ \
|
|
plugins/ \
|
|
homestead_data.db \
|
|
server.properties \
|
|
purpur.yml \
|
|
spigot.yml \
|
|
bukkit.yml \
|
|
ops.json \
|
|
whitelist.json \
|
|
banned-players.json \
|
|
banned-ips.json \
|
|
usercache.json \
|
|
docker-compose.yml \
|
|
>/dev/null 2>&1 || frc=$?
|
|
if tar_ok "$frc"; then
|
|
log " Minecraft (full, offline): OK ($(du -sh "${BACKUP_PATH}/minecraft-full-backup-${TIMESTAMP}.tar.gz" | cut -f1))"
|
|
MC_WORLD_OK=1
|
|
else
|
|
log " Minecraft (offline): partial (rc=${frc})"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Run MC arm — never let it kill the rest of the script.
|
|
if ! mc_backup; then
|
|
log " Minecraft arm exited non-zero — see lines above"
|
|
fi
|
|
|
|
# ── Prune old backups ──────────────────────────────────────────────
|
|
log "Pruning backups older than ${KEEP_DAYS} days..."
|
|
find "$BACKUP_DIR" -maxdepth 1 -type d -mtime "+${KEEP_DAYS}" -exec rm -rf {} + 2>/dev/null || true
|
|
find "$BACKUP_DIR" -maxdepth 1 -name "*.log" -mtime +30 -delete 2>/dev/null || true
|
|
|
|
BACKUP_SIZE=$(du -sh "$BACKUP_PATH" | cut -f1)
|
|
log "=== Backup complete: ${BACKUP_PATH} (${BACKUP_SIZE}) ==="
|
|
|
|
# ── Sentinel ───────────────────────────────────────────────────────
|
|
# Touch the sentinel only if the world (T1 case) was captured. An
|
|
# external monitor (cron on onyx, or ntfy/healthchecks once wired)
|
|
# can alert on `find /opt/backups/.last-success -mmin +1500` to catch
|
|
# silent failures within 25h of a missed daily run.
|
|
if [ "$MC_WORLD_OK" -eq 1 ]; then
|
|
{
|
|
printf 'last_success=%s\n' "$(date -Iseconds)"
|
|
printf 'backup_path=%s\n' "$BACKUP_PATH"
|
|
printf 'backup_size=%s\n' "$BACKUP_SIZE"
|
|
} > "$SENTINEL"
|
|
log "Sentinel updated: ${SENTINEL}"
|
|
else
|
|
log "WARNING: world backup did NOT succeed — sentinel NOT updated"
|
|
fi
|