Repairs the orphaned synapse-signing-key block at scripts/backup.sh
lines 119-122 that was exiting the script under set -e before the
Minecraft block could run, leaving 5 of the last 7 days without a
world backup and zero usable snapshots after 7-day retention.
Phase 1 (deployed today to /opt/docker/backup.sh on nullstone):
- Repaired script — orphan block removed, MC arm wrapped so failures
in one tar don't kill the run
- tar exit code 1 ("file changed as we read it") now treated as
success on the live MC world; spark profiler tmp file noise
silenced via --ignore-failed-read --warning=no-file-changed
- Plugin DBs (homestead, AuthMe, CoreProtect, LuckPerms) and configs
now backed up alongside the world
- Sentinel /opt/backups/.last-success stamped only when the world
arm succeeds — gives outside monitors a single mtime to alert on
- Manually verified end-to-end: 12G world tarball, 492M plugins,
279M dbs, 14 config files, sentinel updated. Pre-fix script saved
at /opt/docker/backup.sh.bak-20260507-pre-phase1.
Phase 2 (scripts in repo, deployment pending operator sudo):
- scripts/restic-backup-playerdata.sh — Class A 5-min restic snapshots
of playerdata/, stats/, advancements/, plugin DBs, LuckPerms;
rcon save-all flush before snapshot; tag-scoped retention
- scripts/restic-init.sh — one-time bootstrap (root-only) for
/etc/mc-backup.{env,pw} + repo init at /home/user/restic/
- scripts/systemd/mc-backup-playerdata.{service,timer} — 5-min timer
with hardening (ProtectSystem=strict, ReadOnlyPaths, etc)
- docs/RUNBOOK-BACKUP-RESTORE.md updated with both phases'
deployment steps and the operator-action checklist
Off-host mirror to onyx (Phase 4) and class B/C/D world snapshots
(Phase 3) are still TODO — see BACKUP-STRATEGY.md §11 phase plan.
135 lines
4 KiB
Bash
135 lines
4 KiB
Bash
#!/usr/bin/env bash
|
|
# /usr/local/bin/restic-backup-playerdata.sh
|
|
#
|
|
# Class A backup per docs/BACKUP-STRATEGY.md — every 5 minutes, snapshot
|
|
# playerdata + stats + advancements + plugin DBs + LuckPerms config.
|
|
# Skips the heavy region/ files (those are Class B, hourly).
|
|
#
|
|
# Driven by mc-backup-playerdata.timer (5 min cadence).
|
|
#
|
|
# Pre-req: restic installed; one-time bootstrap performed by
|
|
# scripts/restic-init.sh which creates the local repo and writes
|
|
# /etc/mc-backup.env + /etc/mc-backup.pw.
|
|
#
|
|
# Status (2026-05-07): scripts shipped to repo; deployment to nullstone
|
|
# is BLOCKED on operator running `apt install restic` + scripts/restic-init.sh
|
|
# under sudo. See docs/RUNBOOK-BACKUP-RESTORE.md "Phase 2 deployment".
|
|
set -euo pipefail
|
|
umask 077
|
|
|
|
ENV_FILE="${MC_BACKUP_ENV_FILE:-/etc/mc-backup.env}"
|
|
if [ ! -r "$ENV_FILE" ]; then
|
|
echo "FATAL: env file $ENV_FILE not readable — run scripts/restic-init.sh first" >&2
|
|
exit 2
|
|
fi
|
|
# shellcheck disable=SC1090
|
|
. "$ENV_FILE"
|
|
|
|
: "${RESTIC_REPOSITORY_FREQUENT:?RESTIC_REPOSITORY_FREQUENT not set in $ENV_FILE}"
|
|
: "${RESTIC_PASSWORD_FILE:?RESTIC_PASSWORD_FILE not set in $ENV_FILE}"
|
|
: "${MC_DATA:?MC_DATA not set in $ENV_FILE}"
|
|
|
|
export RESTIC_REPOSITORY="$RESTIC_REPOSITORY_FREQUENT"
|
|
export RESTIC_PASSWORD_FILE
|
|
|
|
LOG="${MC_BACKUP_LOG:-/var/log/mc-backup.log}"
|
|
SENTINEL="${MC_BACKUP_FREQUENT_SENTINEL:-/var/lib/mc-backup/last-success-frequent}"
|
|
RCON_HOST="${MC_RCON_HOST:-127.0.0.1}"
|
|
RCON_PORT="${MC_RCON_PORT:-25575}"
|
|
RCON_PASS="${MC_RCON_PASSWORD:-}"
|
|
|
|
mkdir -p "$(dirname "$SENTINEL")"
|
|
|
|
log() {
|
|
printf '[%s] [frequent] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" \
|
|
| tee -a "$LOG"
|
|
}
|
|
|
|
on_err() {
|
|
local rc=$?
|
|
log "ERROR rc=${rc} at line ${BASH_LINENO[0]}"
|
|
if [ -n "${ALERT_URL:-}" ]; then
|
|
curl -fsS -m 5 -d "mc-backup-frequent FAILED rc=${rc}" "$ALERT_URL" \
|
|
>/dev/null 2>&1 || true
|
|
fi
|
|
exit "$rc"
|
|
}
|
|
trap on_err ERR
|
|
|
|
log "=== run start (host=$(hostname)) ==="
|
|
|
|
# 1. Best-effort: ask the server to flush before snapshotting.
|
|
# Don't fail the backup if rcon is down or unreachable — we'd rather
|
|
# have a slightly-stale snapshot than no snapshot.
|
|
if [ -n "$RCON_PASS" ] && command -v mcrcon >/dev/null 2>&1; then
|
|
if mcrcon -H "$RCON_HOST" -P "$RCON_PORT" -p "$RCON_PASS" -w 1 \
|
|
"save-all flush" >/dev/null 2>&1; then
|
|
log "rcon save-all flush: ok"
|
|
else
|
|
log "rcon save-all flush: failed (continuing)"
|
|
fi
|
|
else
|
|
log "rcon: skipped (no mcrcon or no password)"
|
|
fi
|
|
|
|
# 2. Build the include list. Anything that's missing on disk is silently
|
|
# skipped by restic, so we can list optional paths freely.
|
|
INCLUDES=(
|
|
"${MC_DATA}/world/playerdata"
|
|
"${MC_DATA}/world/stats"
|
|
"${MC_DATA}/world/advancements"
|
|
"${MC_DATA}/world/level.dat"
|
|
"${MC_DATA}/world_nether/level.dat"
|
|
"${MC_DATA}/world_the_end/level.dat"
|
|
"${MC_DATA}/homestead_data.db"
|
|
"${MC_DATA}/plugins/AuthMe"
|
|
"${MC_DATA}/plugins/CoreProtect/database.db"
|
|
"${MC_DATA}/plugins/LuckPerms"
|
|
)
|
|
|
|
EXISTING=()
|
|
for p in "${INCLUDES[@]}"; do
|
|
if [ -e "$p" ]; then
|
|
EXISTING+=("$p")
|
|
fi
|
|
done
|
|
|
|
if [ ${#EXISTING[@]} -eq 0 ]; then
|
|
log "no source paths exist — aborting"
|
|
exit 3
|
|
fi
|
|
|
|
# 3. Snapshot. Tagged so retention policy can target this class only.
|
|
log "snapshotting ${#EXISTING[@]} path(s)"
|
|
restic backup \
|
|
--tag playerdata \
|
|
--tag auto-5min \
|
|
--host "$(hostname)" \
|
|
--exclude='*.lock' \
|
|
--exclude='*.tmp' \
|
|
"${EXISTING[@]}" \
|
|
>> "$LOG" 2>&1
|
|
|
|
# 4. Light retention — only on this repo, only on this tag.
|
|
restic forget \
|
|
--tag auto-5min \
|
|
--keep-last 24 \
|
|
--keep-hourly 24 \
|
|
--keep-daily 7 \
|
|
--prune \
|
|
--quiet \
|
|
>> "$LOG" 2>&1 || log "forget+prune returned non-zero (continuing)"
|
|
|
|
# 5. Sentinel for external monitor.
|
|
{
|
|
printf 'last_success=%s\n' "$(date -Iseconds)"
|
|
printf 'class=A\n'
|
|
printf 'repo=%s\n' "$RESTIC_REPOSITORY"
|
|
} > "$SENTINEL"
|
|
|
|
# 6. Heartbeat (no-op if HEARTBEAT_URL unset).
|
|
if [ -n "${HEARTBEAT_URL:-}" ]; then
|
|
curl -fsS -m 5 "$HEARTBEAT_URL" >/dev/null 2>&1 || true
|
|
fi
|
|
|
|
log "=== run ok ==="
|