From ca1eec06006e6336eb94f2fed283054b4f781501 Mon Sep 17 00:00:00 2001 From: Zac Gaetano Date: Sun, 31 May 2026 18:34:36 -0400 Subject: [PATCH] fix/feat: recorder finalize-grace + codec validation, cluster mem/version, library download #162 local-spawn stop now uses /stop?t=180 + waits for asset to leave 'live' before removing the container (no more SIGKILL-corrupted masters / stuck-live). #163 validateRecorderConfig guard (PCM!=MP4, HEVC!=MXF, NVENC needs GPU) on create+PATCH; codec presets in new-recorder modal. #159 container list reads Docker /stats memory (N/A when null) + UI render. #160 primary node self-populates version + uptime on the Cluster screen. #145 asset-detail Download original gated by dismissable size warning. Co-Authored-By: Claude Opus 4.8 --- services/mam-api/src/routes/cluster.js | 52 +++++++- services/mam-api/src/routes/recorders.js | 121 +++++++++++++++++- services/web-ui/public/modal-new-recorder.jsx | 20 +++ services/web-ui/public/screens-admin.jsx | 6 +- services/web-ui/public/screens-asset.jsx | 47 ++++++- 5 files changed, 231 insertions(+), 15 deletions(-) diff --git a/services/mam-api/src/routes/cluster.js b/services/mam-api/src/routes/cluster.js index 5f02fe6..e50ee34 100644 --- a/services/mam-api/src/routes/cluster.js +++ b/services/mam-api/src/routes/cluster.js @@ -1,10 +1,27 @@ import express from 'express'; import http from 'http'; +import os from 'os'; import pool from '../db/pool.js'; import { requireAdmin } from '../middleware/auth.js'; const router = express.Router(); +// Hostname the primary mam-api self-registers as (mirrors selfHeartbeat()). +const SELF_HOSTNAME = process.env.NODE_HOSTNAME || os.hostname(); + +// Format a process uptime (seconds) the way the Cluster UI expects — a short +// human string like "3d 4h" / "12m". Workers don't report uptime today, so the +// primary is the only row that populates this. +function formatUptime(seconds) { + const s = Math.floor(seconds); + const d = Math.floor(s / 86400); + const h = Math.floor((s % 86400) / 3600); + const m = Math.floor((s % 3600) / 60); + if (d > 0) return `${d}d ${h}h`; + if (h > 0) return `${h}h ${m}m`; + return `${m}m`; +} + // GET /onboard-info – admin-only. Supplies the Add Node wizard with the bits it // needs to build a `curl … | bash` onboarding command: the primary API URL the // remote node-agent should heartbeat to, the raw URL of onboard-node.sh, and @@ -63,10 +80,17 @@ router.get('/', async (req, res, next) => { FROM cluster_nodes ORDER BY registered_at ASC` ); - res.json(r.rows.map(row => ({ - ...row, - online: Number(row.stale_seconds) < 120, - }))); + res.json(r.rows.map(row => { + const out = { ...row, online: Number(row.stale_seconds) < 120 }; + // The primary (this mam-api host) does not heartbeat via the node-agent, + // so its version/uptime are never populated. Self-populate them here so + // the Cluster screen renders them like worker nodes instead of dashes. + if (row.role === 'primary' && row.hostname === SELF_HOSTNAME) { + out.version = process.env.npm_package_version || row.version || null; + out.uptime = formatUptime(process.uptime()); + } + return out; + })); } catch (err) { next(err); } }); @@ -74,13 +98,27 @@ router.get('/containers', async (req, res, next) => { try { const containers = await dockerRequest('/containers/json?all=true'); if (!Array.isArray(containers)) return res.json([]); - const out = containers.map(c => { + const out = await Promise.all(containers.map(async c => { const rawName = (c.Names[0] || '').replace(/^\//, ''); const name = rawName.replace(/^wild-dragon-/, '').replace(/-\d+$/, ''); const ports = (c.Ports || []) .filter(p => p.PublicPort) .map(p => `${p.PublicPort}→${p.PrivatePort}`) .join(', '); + // Live memory usage requires a per-container stats call (the list endpoint + // doesn't include it). One extra Docker call each, but the list is small. + // memory_stats.usage includes page cache; subtract it to match `docker stats`. + let memBytes = null; + if (c.State === 'running') { + try { + const stats = await dockerRequest(`/containers/${c.Id}/stats?stream=false`); + const ms = stats && stats.memory_stats; + if (ms && typeof ms.usage === 'number') { + const cache = (ms.stats && ms.stats.cache) || 0; + memBytes = ms.usage - cache; + } + } catch (_) { memBytes = null; } + } return { id: c.Id.slice(0, 12), name, @@ -90,9 +128,9 @@ router.get('/containers', async (req, res, next) => { healthy: (c.Status || '').includes('healthy'), ports, cpu: 0, - mem: 0, + memBytes, }; - }); + })); res.json(out); } catch (err) { if (err.code === 'ENOENT' || err.code === 'EACCES') return res.json([]); diff --git a/services/mam-api/src/routes/recorders.js b/services/mam-api/src/routes/recorders.js index 017ac83..253f569 100644 --- a/services/mam-api/src/routes/recorders.js +++ b/services/mam-api/src/routes/recorders.js @@ -165,6 +165,94 @@ function pickRecorderFields(body) { return out; } +// Codecs that require an NVIDIA GPU on the target node. +const GPU_CODECS = ['hevc_nvenc', 'h264_nvenc']; + +// Issue #163 — codec/container/audio compatibility guard. Returns null when the +// config is valid, otherwise a descriptive error string naming the bad combo. +// `nodeHasGpu` is tri-state: true (GPU present), false (no GPU), or null +// (unknown — node not resolvable at this point, so GPU is only a soft check). +// +// Rules: +// - PCM audio is only valid in MOV/MXF containers, never MP4 (an MP4 with a +// PCM track produces a corrupt/unplayable master — also part of #162). +// - HEVC is not valid in MXF in this build. +// - NVENC codecs require the target node to have a GPU. +function validateRecorderConfig(cfg, nodeHasGpu = null) { + if (!cfg) return null; + + const container = String(cfg.recording_container || '').toLowerCase(); + const codec = String(cfg.recording_codec || '').toLowerCase(); + const audio = String(cfg.recording_audio_codec || '').toLowerCase(); + + // PCM audio + MP4 → reject. + if (container === 'mp4' && audio.startsWith('pcm')) { + return `Invalid combo: PCM audio (${cfg.recording_audio_codec}) is not supported in an MP4 container. Use a MOV or MXF container, or switch the audio codec to AAC.`; + } + + // HEVC in MXF → reject. + if (container === 'mxf' && (codec === 'hevc' || codec === 'hevc_nvenc')) { + return `Invalid combo: HEVC (${cfg.recording_codec}) is not supported in an MXF container in this build. Use a MOV/MP4 container, or pick a DNxHR/ProRes codec for MXF.`; + } + + // NVENC requires a GPU on the target node. Only a hard error when we know the + // node lacks one; unknown capability is left as a soft pass. + if (GPU_CODECS.includes(codec) && nodeHasGpu === false) { + return `Invalid combo: codec ${cfg.recording_codec} requires an NVIDIA GPU, but the target node reports no GPU. Choose a software codec (e.g. prores_hq, dnxhr_hq, h264) or assign a GPU node.`; + } + + return null; +} + +// Resolve whether a recorder's target node has a GPU. Returns true/false when +// the node's heartbeat capability is known, or null when it can't be resolved +// (no node assigned / no capability reported) — callers treat null as a soft +// check per validateRecorderConfig. +async function nodeHasGpuCapability(nodeId) { + if (!nodeId) return null; + try { + const r = await pool.query( + 'SELECT capabilities FROM cluster_nodes WHERE id = $1', + [nodeId] + ); + if (r.rows.length === 0) return null; + const caps = r.rows[0].capabilities; + const gpus = caps && caps.gpus; + if (!Array.isArray(gpus)) return null; + return gpus.length > 0; + } catch (_) { + return null; + } +} + +const sleep = (ms) => new Promise(r => setTimeout(r, ms)); + +// Issue #162 — after a local-spawn stop, wait for the capture container to +// finalize its master. The asset row was pre-created at start with +// status='live' (display_name = current_session_id); the ingest/finalize step +// flips it to ready/processing once the MOV/MP4 trailer is written. We poll +// until the asset leaves 'live' (or disappears) or we hit the timeout, so we +// don't DELETE the container — and SIGKILL ffmpeg — before the trailer lands. +async function waitForFinalize(recorder, { timeoutMs = 180000, intervalMs = 3000 } = {}) { + if (!recorder.current_session_id) return; + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + try { + const r = await pool.query( + `SELECT 1 FROM assets + WHERE project_id = $1 + AND display_name = $2 + AND status = 'live' + LIMIT 1`, + [recorder.project_id, recorder.current_session_id] + ); + // No live asset row left → finalize is done (or there was none to wait on). + if (r.rows.length === 0) return; + } catch (_) { /* transient DB error — keep polling until timeout */ } + await sleep(intervalMs); + } +} + // GET / - List all recorders // // Issue #121 — previous version fired N PG queries + N Docker inspects per @@ -255,6 +343,13 @@ router.post('/', async (req, res, next) => { }; const row = { id: uuidv4(), status: 'stopped', ...defaults, ...fields }; + // Issue #163 — reject invalid codec/container/audio combos before insert. + const createGpu = await nodeHasGpuCapability(row.node_id); + const createErr = validateRecorderConfig(row, createGpu); + if (createErr) { + return res.status(400).json({ error: createErr }); + } + // Build INSERT dynamically so adding columns later means one place to update. const cols = Object.keys(row); const placeholders = cols.map((_, i) => `$${i + 1}`).join(', '); @@ -321,6 +416,15 @@ router.patch('/:id', requireRecorderEdit, async (req, res, next) => { return res.status(400).json({ error: 'No fields to update' }); } + // Issue #163 — validate the resulting config (existing row overlaid with the + // incoming changes) so a PATCH can't introduce an invalid combo either. + const merged = { ...recorder, ...fields }; + const patchGpu = await nodeHasGpuCapability(merged.node_id); + const patchErr = validateRecorderConfig(merged, patchGpu); + if (patchErr) { + return res.status(400).json({ error: patchErr }); + } + const setClause = cols.map((k, i) => `${k} = $${i + 1}`).join(', '); const params = cols.map(k => fields[k]); params.push(id); @@ -496,9 +600,8 @@ router.post('/:id/start', requireRecorderEdit, async (req, res, next) => { } // GPU-accelerated codecs require the NVIDIA container runtime on the node. - // hevc_nvenc / h264_nvenc are the only two we currently support; extend - // this list if av1_nvenc or others are added later. - const GPU_CODECS = ['hevc_nvenc', 'h264_nvenc']; + // hevc_nvenc / h264_nvenc are the only two we currently support (see the + // module-level GPU_CODECS list); extend it if av1_nvenc or others are added. const useGpu = GPU_CODECS.includes(recorder.recording_codec); // Determine whether to spawn locally or via a remote node-agent. @@ -663,9 +766,13 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => { return res.status(502).json({ error: 'Remote node failed to stop sidecar' }); } } else { + // Issue #162 — stop WITH a grace period (t=180). Docker sends SIGTERM and + // waits up to 180s for ffmpeg to flush and write the MOV/MP4 trailer before + // it SIGKILLs. Without this the master is truncated/corrupt and the + // pre-created asset can get stuck in 'live'. const stopRes = await dockerApi( 'POST', - `/containers/${recorder.container_id}/stop` + `/containers/${recorder.container_id}/stop?t=180` ); // 204 = stopped, 304 = already stopped, 404 = container gone — all acceptable. @@ -678,6 +785,12 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => { // Only attempt remove if the container existed (not 404). if (stopRes.status !== 404) { + // Issue #162 — before removing the container, wait for the master to + // finalize (asset leaves 'live'), mirroring the remote path's reliance on + // the node-agent's clean teardown. This guards against deleting the + // container — and its lingering finalize work — too early. + await waitForFinalize(recorder); + const removeRes = await dockerApi( 'DELETE', `/containers/${recorder.container_id}` diff --git a/services/web-ui/public/modal-new-recorder.jsx b/services/web-ui/public/modal-new-recorder.jsx index e89900a..236da92 100644 --- a/services/web-ui/public/modal-new-recorder.jsx +++ b/services/web-ui/public/modal-new-recorder.jsx @@ -397,6 +397,25 @@ function NewRecorderModal({ open, onClose }) {
{recTab === 'video' && ( + <> + {/* Codec presets — one click fills codec + bitrate with a known-good + combo that passes the server-side validateRecorderConfig guard. + Container is derived from the codec (HEVC/ProRes/DNxHR → MOV, + H.264 → MP4), and master audio is always PCM (valid in MOV). */} +
+ {[ + { id: 'hevc', label: 'HEVC Master (MOV)', codec: 'hevc_nvenc', bitrate: '60' }, + { id: 'h264', label: 'H.264 Proxy-friendly (MP4)', codec: 'h264_nvenc', bitrate: '25' }, + { id: 'dnxhr', label: 'DNxHR HQ (MOV)', codec: 'dnxhr_hq', bitrate: '145' }, + ].map(p => ( + + ))} +
@@ -444,6 +463,7 @@ function NewRecorderModal({ open, onClose }) { return null; })()}
+ )} {recTab === 'audio' && (
diff --git a/services/web-ui/public/screens-admin.jsx b/services/web-ui/public/screens-admin.jsx index 64c5a61..264f003 100644 --- a/services/web-ui/public/screens-admin.jsx +++ b/services/web-ui/public/screens-admin.jsx @@ -1169,7 +1169,11 @@ function Containers() { {(c.cpu || 0).toFixed(1)}%
-
{c.mem} MB
+
+ {c.memBytes != null + ? `${Math.round(c.memBytes / 1048576)} MB` + : "N/A"} +
{c.ports}
diff --git a/services/web-ui/public/screens-asset.jsx b/services/web-ui/public/screens-asset.jsx index 5226898..81aa80f 100644 --- a/services/web-ui/public/screens-asset.jsx +++ b/services/web-ui/public/screens-asset.jsx @@ -199,6 +199,45 @@ function AssetDetail({ asset, onClose }) { // Pull a presigned hi-res URL and trigger a browser download with the // asset's display name as the filename. Falls back to opening in a new tab. const [downloading, setDownloading] = React.useState(false); + + // Gate the download behind a one-time "large file / connection speed" + // warning, shared with the library via the df.lib.download.warnDismissed + // localStorage flag. Once dismissed, downloads start without the prompt. + const dismissForeverRef = React.useRef(false); + const requestDownload = async function() { + if (downloading) return; + let dismissed = false; + try { dismissed = localStorage.getItem('df.lib.download.warnDismissed') === '1'; } catch (_) {} + if (!dismissed) { + dismissForeverRef.current = false; + const ok = await confirm({ + title: 'Download original', + message:
+
+ You're about to download the full-length original master for {asset.name}. + These files can be very large and download speed depends on your connection. +
+ +
, + confirmLabel: 'Download', + cancelLabel: 'Cancel', + danger: false, + }); + if (!ok) return; + // Persist the dismissal only after the user confirms the download. + if (dismissForeverRef.current) { + try { localStorage.setItem('df.lib.download.warnDismissed', '1'); } catch (_) {} + } + } + downloadHires(); + }; + const downloadHires = function() { if (downloading) return; setDownloading(true); @@ -372,9 +411,11 @@ function AssetDetail({ asset, onClose }) {
- + {asset.original_s3_key && ( + + )}