From 549c86efdffe076a1b97a6aa758e3991b0bd1ea8 Mon Sep 17 00:00:00 2001 From: s8n Date: Sat, 9 May 2026 01:11:31 +0100 Subject: [PATCH] doc 26 + bin: incident 2026-05-09 + headless smoke-test Symptoms: Page Unresponsive on poster grid, posters missing then black backdrops, 'Abspielen' German Play button surviving Traefik+force-english chases, video black-screen on play. Root causes (different from initial guesses): - Browser hangs: deployed index.html drifted ahead of repo; uncommitted forceEnglishUI() text-walker MutationObserver froze main thread on poster lazy-load. Reverted to repo HEAD. - 'Abspielen': Cineplex theme HARDCODES German via 'content:' ::after rule -- not a Jellyfin locale issue. Doc 25 already proved per-user UICulture is theatre. Override CSS with content: 'Play'. - Backdrops black: BLACK-PASS CustomCss block paints #000 !important on .layout-desktop / .pageContainer -- occludes backdrop layer (z-index:-1). Existing transparent-scope rule used body.itemDetailPage selector that doesn't match in 10.10.3 (body class is libraryDocument). Replaced with :has(.itemDetailPage) ancestor scoping. - HLS 499: encoding.xml had EnableThrottling+EnableSegmentDeletion=true, segments reaped before browser re-request. Disabled both. Verified via new bin/headless-test.py (playwright Chromium login + screenshot + computed-style probe). Fixes idempotent and re-runnable via new bin/apply-26-incident-fixes.sh. Open: AV1+Opus items still black-screen in Chrome due to DirectStream codec-tag mislabel bug. Tracked for 10.11.8 migration. --- bin/apply-26-incident-fixes.sh | 80 +++ bin/headless-test.py | 174 ++++++ ...26-05-09-page-unresponsive-and-playback.md | 561 ++++++++++++++++++ 3 files changed, 815 insertions(+) create mode 100755 bin/apply-26-incident-fixes.sh create mode 100755 bin/headless-test.py create mode 100644 docs/26-incident-2026-05-09-page-unresponsive-and-playback.md diff --git a/bin/apply-26-incident-fixes.sh b/bin/apply-26-incident-fixes.sh new file mode 100755 index 0000000..1c93d24 --- /dev/null +++ b/bin/apply-26-incident-fixes.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# apply-26-incident-fixes.sh +# +# Re-applies the three server-state fixes from docs/26 if branding.xml / +# encoding.xml drift back to broken state (e.g. after a Jellyfin restore). +# +# 1. CustomCss: Cineplex hardcoded "Abspielen" → "Play" +# 2. CustomCss: Backdrop transparent-scope using :has() (BLACK-PASS occluded backdrop layer) +# 3. encoding.xml: EnableThrottling=false + EnableSegmentDeletion=false (kills HLS 499) +# +# Usage: ssh user@nullstone "$(cat bin/apply-26-incident-fixes.sh)" +# Idempotent: re-running is safe. + +set -euo pipefail + +# 3. encoding.xml — disable throttling + segment deletion (both containers if present) +for cfg in /home/docker/jellyfin/config/config/encoding.xml \ + /home/docker/jellyfin-dev/config/config/encoding.xml; do + [ -f "$cfg" ] || continue + cp -n "$cfg" "$cfg.bak.pre-doc26" || true + sed -i \ + -e 's|true|false|' \ + -e 's|true|false|' \ + "$cfg" + echo "[+] patched $cfg" +done + +# 1+2. branding.xml CustomCss — Abspielen + backdrop transparent-scope +patch_branding() { + local cfg="$1" + [ -f "$cfg" ] || return 0 + if grep -q "ARRFLIX 2026-05-09" "$cfg"; then + echo "[=] $cfg already has doc-26 patch" + return 0 + fi + cp -n "$cfg" "$cfg.bak.pre-doc26" || true + python3 - <", patch + "") +open(p, "w").write(s) +PY + echo "[+] patched $cfg" +} + +patch_branding /home/docker/jellyfin/config/config/branding.xml +patch_branding /home/docker/jellyfin-dev/config/config/branding.xml + +# Restart so changes take effect +docker restart jellyfin jellyfin-dev 2>/dev/null || docker restart jellyfin + +echo "[*] Done. Verify with bin/headless-test.py." diff --git a/bin/headless-test.py b/bin/headless-test.py new file mode 100755 index 0000000..1ea85da --- /dev/null +++ b/bin/headless-test.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +"""ARRFLIX headless smoke-test. Logs in via API, navigates to a detail page, +captures screenshot + console errors + network failures + computed-style for +backdrop. Pass dev or prod URL as argv[1].""" +import sys, json, time, os, asyncio, urllib.request, urllib.error +from playwright.async_api import async_playwright + +URL = sys.argv[1] if len(sys.argv) > 1 else "https://dev.arrflix.s8n.ru" +USER = sys.argv[2] if len(sys.argv) > 2 else "guest-mirror" +PASS = sys.argv[3] if len(sys.argv) > 3 else "dev-test-guest" +ITEM = sys.argv[4] if len(sys.argv) > 4 else None # auto-pick first Series if absent +OUT = sys.argv[5] if len(sys.argv) > 5 else "/tmp/arrflix-headless" +os.makedirs(OUT, exist_ok=True) + +DEVICE = "headless-test" +DEVICE_ID = "headless-test-2026-05-09" +CLIENT = "Headless" +VERSION = "1.0" + +def auth_header(token=None): + h = (f'MediaBrowser Client="{CLIENT}", Device="{DEVICE}", ' + f'DeviceId="{DEVICE_ID}", Version="{VERSION}"') + if token: + h += f', Token="{token}"' + return h + +def api_post(path, body, token=None): + req = urllib.request.Request( + f"{URL}{path}", + data=json.dumps(body).encode(), + headers={ + "Authorization": auth_header(token), + "Content-Type": "application/json", + }, + method="POST", + ) + ctx = __import__("ssl")._create_unverified_context() + with urllib.request.urlopen(req, context=ctx) as r: + return json.loads(r.read()) + +def api_get(path, token=None): + req = urllib.request.Request( + f"{URL}{path}", + headers={"Authorization": auth_header(token)}, + ) + ctx = __import__("ssl")._create_unverified_context() + with urllib.request.urlopen(req, context=ctx) as r: + return json.loads(r.read()) + +def login(): + r = api_post("/Users/AuthenticateByName", + {"Username": USER, "Pw": PASS}) + return r["AccessToken"], r["User"]["Id"], r["ServerId"] + +async def main(): + token, user_id, server_id = login() + print(f"[+] Authenticated as {USER} ({user_id})") + + item_id = ITEM + if not item_id: + items = api_get( + f"/Users/{user_id}/Items?Recursive=true&IncludeItemTypes=Series&Limit=5", + token) + if items.get("Items"): + item_id = items["Items"][0]["Id"] + print(f"[+] Auto-picked Series: {items['Items'][0]['Name']} ({item_id})") + else: + print("[!] No series found, falling back to root") + + console_messages = [] + network_failures = [] + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True, + args=["--no-sandbox", "--disable-dev-shm-usage"]) + ctx = await browser.new_context( + viewport={"width": 1600, "height": 900}, + ignore_https_errors=True) + page = await ctx.new_page() + + page.on("console", lambda m: console_messages.append( + f"[{m.type}] {m.text}")) + page.on("requestfailed", lambda r: network_failures.append( + f"{r.method} {r.url} :: {r.failure}")) + page.on("response", lambda r: None if r.status < 400 else + network_failures.append(f"HTTP {r.status} {r.url}")) + + # Auth via login form + await page.goto(f"{URL}/web/", wait_until="networkidle", timeout=30000) + await asyncio.sleep(3) + # Wait for any input rendered by SPA + try: + await page.wait_for_selector("input", timeout=20000) + inputs = await page.evaluate( + "() => Array.from(document.querySelectorAll('input')).map(i => ({id:i.id, name:i.name, type:i.type, placeholder:i.placeholder}))") + print(f"[*] inputs: {inputs}") + # Find username input by heuristic + user_sel = None + pass_sel = None + for i in inputs: + fid, fname, ftype = i.get('id',''), i.get('name',''), i.get('type','') + if not user_sel and (ftype == 'text' or 'user' in (fid+fname).lower() or 'name' in (fid+fname).lower()): + user_sel = f'#{fid}' if fid else f'input[name="{fname}"]' + if not pass_sel and ftype == 'password': + pass_sel = f'#{fid}' if fid else f'input[name="{fname}"]' + print(f"[*] user_sel={user_sel} pass_sel={pass_sel}") + if user_sel and pass_sel: + await page.fill(user_sel, USER) + await page.fill(pass_sel, PASS) + await page.keyboard.press("Enter") + await page.wait_for_load_state("networkidle", timeout=20000) + await asyncio.sleep(2) + print("[+] logged in via form") + else: + print("[!] could not locate login fields") + except Exception as e: + print(f"[!] form login failed: {e}") + + # Navigate to detail page + target = (f"{URL}/web/#/details?id={item_id}&serverId={server_id}" + if item_id else f"{URL}/web/") + print(f"[*] navigating: {target}") + await page.goto(target, wait_until="networkidle", timeout=30000) + await asyncio.sleep(4) # let SPA paint backdrop + + # Probe key DOM elements + probe = await page.evaluate("""() => { + const result = {}; + const sel = ['.itemBackdrop', '.detailBackdrop', '.backdropContainer', + '.backgroundContainer', '.layout-desktop', + 'body', '#reactRoot', '.itemDetailPage', + 'video', '.htmlvideoplayer', '.btnPlay', '.detailPagePrimaryContainer']; + for (const s of sel) { + const el = document.querySelector(s); + if (!el) { result[s] = ''; continue; } + const cs = getComputedStyle(el); + result[s] = { + display: cs.display, + opacity: cs.opacity, + visibility: cs.visibility, + background: cs.backgroundColor, + backgroundImage: cs.backgroundImage.slice(0, 80), + zIndex: cs.zIndex, + rect: el.getBoundingClientRect().toJSON(), + }; + } + result['__title'] = document.title; + const playBtn = document.querySelector('.btnPlay, [data-action="play"]'); + result['__playBtnText'] = playBtn ? (playBtn.innerText || playBtn.textContent || '').trim() : null; + result['__bodyClasses'] = document.body.className; + result['__url'] = location.href; + return result; + }""") + + screenshot = os.path.join(OUT, f"{URL.replace('https://','').replace('.','_')}-detail.png") + await page.screenshot(path=screenshot, full_page=False) + print(f"[+] screenshot: {screenshot}") + + with open(os.path.join(OUT, "probe.json"), "w") as f: + json.dump({ + "url": URL, + "user": USER, + "item": item_id, + "probe": probe, + "console": console_messages[-50:], + "network_failures": network_failures[-50:], + }, f, indent=2) + print(f"[+] probe.json: {os.path.join(OUT, 'probe.json')}") + print(f"[+] console msgs: {len(console_messages)}") + print(f"[+] network failures: {len(network_failures)}") + + await browser.close() + +asyncio.run(main()) diff --git a/docs/26-incident-2026-05-09-page-unresponsive-and-playback.md b/docs/26-incident-2026-05-09-page-unresponsive-and-playback.md new file mode 100644 index 0000000..3abe894 --- /dev/null +++ b/docs/26-incident-2026-05-09-page-unresponsive-and-playback.md @@ -0,0 +1,561 @@ +# 26 — Incident 2026-05-09: Page Unresponsive + Posters Missing + Playback Black-Screen + +> Session log. Live document — updated as fix proceeds. Goal: future-me + other operators can read this and skip every dead-end I already walked. + +Status as of doc creation: **ONGOING** — partial fix applied, more under investigation. + +--- + +## Symptoms reported by owner (in order) + +1. "Browser arrflix is broken videos don't play at all" +2. "I can't even see a preview of the TV series / movie" +3. After first fix: page loads, posters render, but **"Page Unresponsive"** Chrome dialog before posters paint (screenshot 1) +4. After second fix attempt: posters render, but **"Abspielen"** (German Play button) instead of "Play"; **all backdrop art replaced by black**; **video plays as black screen** (screenshot 2) + +--- + +## Root causes identified so far + +### A — Browser hangs (resolved by fix #1) + +`/opt/docker/jellyfin/web-overrides/index.html` deployed copy was AHEAD of repo HEAD. md5 deployed `b97c1cb4` ≠ repo `d77c106b`. Someone hot-patched a `forceEnglishUI()` text-walker MutationObserver onto `document.body` with `subtree:true, characterData:true`. Walker rewrote `alt`/`title`/`aria-label` on every DOM mutation. Poster grid lazy-load fired it hundreds of times → main thread frozen → Chrome "Page Unresponsive". + +**Fix applied:** scp'd repo HEAD `index.html` over deployed, restarted container. Verified md5 matches. + +**Lesson:** never hot-patch the bind-mount. Always commit + redeploy from repo. Drift is invisible until something breaks. + +### B — DB write failures (auto-resolved before this session) + +Agent investigation found `jellyfin.db` had been owned by uid 101000 (userns-remap leftover, see `~/.claude/projects/-home-admin-ai-lab/memory/project_nullstone_docker_userns.md`). Container ran as 1000 → SQLite Error 8: `attempt to write a readonly database`. By the time we re-checked, file was already `user:user`. Probably fixed during 23:22 container restart. + +**Lesson:** if `jellyfin.db` is unwritable, EVERY user-config save silently fails (HTTP 204 success, value not persisted). Check ownership FIRST when config writes don't stick. + +### C — German "Abspielen" leak (NOT YET FIXED — current focus) + +User's `Configuration.UICulture` is `` for ALL 12 users. Tried POST `/Users/{id}/Configuration` with `UICulture: en-US` payload via `bin/force-english-all-users.sh`. Server returned HTTP 204 but field did NOT persist on subsequent GET. **POST silently drops UICulture**. + +Possible explanation: the `UserConfiguration` model in 10.10.3 may have removed the per-user UICulture field, OR the `Users` table schema (verified) has no UICulture column AND no Preferences row stores it. Doc 15 claims `Configuration.UICulture` is authoritative, but that doc is from when fix worked. Behavior may have shifted. + +Traefik DOES rewrite `Accept-Language: en-US,en;q=0.9` on every request (`force-en-accept-lang@file` middleware) AND rewrites locale chunk JS path so `de-json.X.chunk.js` → `en-us-json.667484b4a441712c7e05.chunk.js`. Verified via curl: `de-json.X.chunk.js` returns 107425 bytes of English content. + +**So why German leaking?** Service Worker cache. Browser's SW serves stale German chunk from CacheStorage, never hits network, never sees the Traefik rewrite. SW from before the lockdown was deployed. + +Tried: `Clear-Site-Data: "cache", "cookies", "storage"` Traefik response header on `/web/index.html`. Verified live via curl. **But the user's browser STILL has SW cache** — SW intercepts the GET to `/web/index.html` and serves from cache, response from server (with Clear-Site-Data) never reaches browser cache layer. SW prevents its own death. + +### D — Backdrops missing (NOT YET INVESTIGATED) + +User reports backdrop art (the wide background image behind episode cards) is now black for every show. Could be: +- Image not in DB/cache (server returning empty) +- CSS hiding backdrop element +- SW serving stale 404 from a bad earlier session +- Jellyfin metadata refresh interrupted + +### E — Video black screen on play (NOT YET FIXED) + +Server logs show ffmpeg IS transcoding HEVC source → H.264 high@5.1 + libfdk_aac. But browser shows black. Earlier `/Sessions` proved DirectPlay worked for one client (RemoteEndPoint 82.31.156.86). Recent attempts: HLS segment 186.mp4 returned **499 (client closed connection)** + `POST /Sessions/Playing/Progress` returned **502 Bad Gateway** at 23:31:49 (during traefik momentary upstream-missing window). + +Possible causes: +- SW intercepting HLS init segment, serving stale/wrong-mime +- 10-bit HEVC source → H.264 transcode timing issue +- CSS hiding `