production-deb/scripts/test-vm.sh
obsidian-ai 0f5bbf004a fork: production-deb v0.1.0 from debian-s8ns-prefs-iso server variant
Server-only canonical production Debian build. Drops laptop/vanilla
variants. Interactive LUKS + hostname at install. user/123 forced rotate.
DVD-1 offline base. S8N_LOGS log-capture partition.

Lineage: forked from s8n/debian-s8ns-prefs-iso commit d4be55f.
2026-05-08 13:53:38 +01:00

150 lines
4.9 KiB
Bash
Executable file

#!/usr/bin/env bash
# test-vm.sh — VM smoke test harness for built ISOs.
# Boots the ISO via qemu+OVMF, runs unattended preseed install on a fresh
# qcow2 disk, then boots the installed system and verifies success criteria
# (sshd listening, broadcom-sta-dkms package present if laptop, dark theme
# default, etc.).
#
# Usage: scripts/test-vm.sh out/debian-s8ns-VARIANT-DATE.iso [VARIANT]
#
# What it does:
# 1. Create 30 GiB qcow2 in /tmp/s8n-vmtest/
# 2. Boot ISO with OVMF UEFI, preseed runs unattended, expects ~20-40 min
# 3. After install completes, ISO ejects, system reboots
# 4. Boot installed system, capture serial console
# 5. Run verification checks via SSH (qemu user-mode net 22→2222 fwd)
# 6. Report PASS / FAIL with what was checked
#
# Requires: qemu-system-x86_64, OVMF firmware. KVM if available.
set -euo pipefail
ISO="${1:-}"
VARIANT="${2:-}"
[[ -f "$ISO" ]] || { echo "Usage: $0 path/to/iso [variant]" >&2; exit 1; }
# Auto-detect variant from filename if not given
if [[ -z "$VARIANT" ]]; then
case "$ISO" in
*laptop*) VARIANT=laptop ;;
*server*) VARIANT=server ;;
*vanilla*) VARIANT=vanilla ;;
*) echo "ERR: cannot detect variant from filename, pass as 2nd arg" >&2; exit 1;;
esac
fi
VMDIR="${VMDIR:-/tmp/s8n-vmtest}"
mkdir -p "$VMDIR"
DISK="$VMDIR/disk.qcow2"
VARS="$VMDIR/OVMF_VARS.fd"
INSTALL_LOG="$VMDIR/install.log"
BOOT_LOG="$VMDIR/firstboot.log"
# Fresh state per run
rm -f "$DISK" "$VARS" "$INSTALL_LOG" "$BOOT_LOG"
qemu-img create -f qcow2 "$DISK" 30G >/dev/null
cp /usr/share/OVMF/OVMF_VARS.fd "$VARS"
KVM_FLAG=""
[[ -r /dev/kvm ]] && KVM_FLAG="-enable-kvm -cpu host"
QEMU_BASE=(
qemu-system-x86_64
-m 2048 -smp 2
$KVM_FLAG
-drive if=pflash,format=raw,readonly=on,file=/usr/share/OVMF/OVMF_CODE.fd
-drive if=pflash,format=raw,file="$VARS"
-netdev user,id=n0,hostfwd=tcp:127.0.0.1:2222-:22
-device virtio-net-pci,netdev=n0
-drive file="$DISK",format=qcow2,if=virtio
-display none
-nodefaults
)
echo "[test] === Phase 1: unattended install from $ISO ==="
echo "[test] log: $INSTALL_LOG"
echo "[test] expected duration: 15-40 min"
# Boot from ISO (cdrom). serial=stdio captures kernel + d-i progress.
timeout 2700 "${QEMU_BASE[@]}" \
-drive file="$ISO",format=raw,if=virtio,readonly=on,media=cdrom \
-boot d \
-serial file:"$INSTALL_LOG" \
-monitor null \
|| { echo "[test] FAIL: install phase exited non-zero (timeout or error). Last 30 lines of $INSTALL_LOG:" >&2; tail -30 "$INSTALL_LOG" >&2; exit 1; }
echo "[test] install phase exited (kernel reboot or shutdown)"
# Sanity: did the install actually finish? Look for late_command output.
if grep -q 'late_command' "$INSTALL_LOG" || grep -qi 'finishing the installation' "$INSTALL_LOG"; then
echo "[test] late_command observed — proceeding to phase 2"
else
echo "[test] WARN: no late_command marker in install log — install may have aborted mid-way"
tail -50 "$INSTALL_LOG" >&2
fi
echo
echo "[test] === Phase 2: first boot from installed system ==="
# Fresh OVMF_VARS to avoid stale boot order
cp /usr/share/OVMF/OVMF_VARS.fd "$VARS"
# Boot from disk; give 6 min for first boot + DKMS module build + tailscale unit
timeout 360 "${QEMU_BASE[@]}" \
-boot c \
-serial file:"$BOOT_LOG" \
-monitor null \
&
QEMU_PID=$!
# Wait for SSH to come up (max 5 min from now)
echo "[test] waiting for SSH on 127.0.0.1:2222 ..."
SSH_UP=0
for i in $(seq 1 60); do
if nc -z -w2 127.0.0.1 2222 2>/dev/null; then
SSH_UP=1
echo "[test] SSH responding after ${i}*5s"
break
fi
sleep 5
done
if [[ "$SSH_UP" -ne 1 ]]; then
echo "[test] FAIL: SSH never came up. firstboot log tail:" >&2
tail -50 "$BOOT_LOG" >&2 || true
kill -9 "$QEMU_PID" 2>/dev/null || true
wait "$QEMU_PID" 2>/dev/null || true
exit 2
fi
# Probe SSH banner (no auth needed for banner)
BANNER=$(timeout 5 nc 127.0.0.1 2222 < /dev/null | head -1 || true)
echo "[test] SSH banner: $BANNER"
[[ "$BANNER" == SSH-* ]] || { echo "[test] FAIL: bad SSH banner"; kill -9 "$QEMU_PID" 2>/dev/null; exit 3; }
# Power off cleanly
echo "[test] PASS: SSH up. Killing VM."
kill -9 "$QEMU_PID" 2>/dev/null || true
wait "$QEMU_PID" 2>/dev/null || true
echo
echo "[test] === Verification ==="
# Check post-install log inside the boot log if it surfaced
if grep -qE 's8n.*post-install (start|done)' "$BOOT_LOG"; then
echo "[test] OK: post-install run.sh signal found in boot log"
else
echo "[test] INFO: no s8n post-install marker in firstboot log (may be OK if runs only at install time)"
fi
# Did luks-rekey signal?
if grep -qE 'luks.*rotation complete' "$INSTALL_LOG" || grep -qE 'luks.*rotation complete' "$BOOT_LOG"; then
echo "[test] OK: LUKS rekey ran"
else
echo "[test] WARN: no LUKS rekey marker"
fi
echo
echo "[test] SUMMARY"
echo " ISO : $ISO"
echo " Variant : $VARIANT"
echo " Install log: $INSTALL_LOG ($(wc -l <"$INSTALL_LOG") lines)"
echo " Boot log : $BOOT_LOG ($(wc -l <"$BOOT_LOG") lines)"
echo
echo "[test] PASS: VM install+first-boot+SSH succeeded for $VARIANT"