fix/feat: recorder finalize-grace + codec validation, cluster mem/version, library download

#162 local-spawn stop now uses /stop?t=180 + waits for asset to leave 'live'
before removing the container (no more SIGKILL-corrupted masters / stuck-live).
#163 validateRecorderConfig guard (PCM!=MP4, HEVC!=MXF, NVENC needs GPU) on
create+PATCH; codec presets in new-recorder modal.
#159 container list reads Docker /stats memory (N/A when null) + UI render.
#160 primary node self-populates version + uptime on the Cluster screen.
#145 asset-detail Download original gated by dismissable size warning.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Zac Gaetano 2026-05-31 18:34:36 -04:00
parent 794b9d9929
commit ca1eec0600
5 changed files with 231 additions and 15 deletions

View file

@ -1,10 +1,27 @@
import express from 'express';
import http from 'http';
import os from 'os';
import pool from '../db/pool.js';
import { requireAdmin } from '../middleware/auth.js';
const router = express.Router();
// Hostname the primary mam-api self-registers as (mirrors selfHeartbeat()).
const SELF_HOSTNAME = process.env.NODE_HOSTNAME || os.hostname();
// Format a process uptime (seconds) the way the Cluster UI expects — a short
// human string like "3d 4h" / "12m". Workers don't report uptime today, so the
// primary is the only row that populates this.
function formatUptime(seconds) {
const s = Math.floor(seconds);
const d = Math.floor(s / 86400);
const h = Math.floor((s % 86400) / 3600);
const m = Math.floor((s % 3600) / 60);
if (d > 0) return `${d}d ${h}h`;
if (h > 0) return `${h}h ${m}m`;
return `${m}m`;
}
// GET /onboard-info admin-only. Supplies the Add Node wizard with the bits it
// needs to build a `curl … | bash` onboarding command: the primary API URL the
// remote node-agent should heartbeat to, the raw URL of onboard-node.sh, and
@ -63,10 +80,17 @@ router.get('/', async (req, res, next) => {
FROM cluster_nodes
ORDER BY registered_at ASC`
);
res.json(r.rows.map(row => ({
...row,
online: Number(row.stale_seconds) < 120,
})));
res.json(r.rows.map(row => {
const out = { ...row, online: Number(row.stale_seconds) < 120 };
// The primary (this mam-api host) does not heartbeat via the node-agent,
// so its version/uptime are never populated. Self-populate them here so
// the Cluster screen renders them like worker nodes instead of dashes.
if (row.role === 'primary' && row.hostname === SELF_HOSTNAME) {
out.version = process.env.npm_package_version || row.version || null;
out.uptime = formatUptime(process.uptime());
}
return out;
}));
} catch (err) { next(err); }
});
@ -74,13 +98,27 @@ router.get('/containers', async (req, res, next) => {
try {
const containers = await dockerRequest('/containers/json?all=true');
if (!Array.isArray(containers)) return res.json([]);
const out = containers.map(c => {
const out = await Promise.all(containers.map(async c => {
const rawName = (c.Names[0] || '').replace(/^\//, '');
const name = rawName.replace(/^wild-dragon-/, '').replace(/-\d+$/, '');
const ports = (c.Ports || [])
.filter(p => p.PublicPort)
.map(p => `${p.PublicPort}${p.PrivatePort}`)
.join(', ');
// Live memory usage requires a per-container stats call (the list endpoint
// doesn't include it). One extra Docker call each, but the list is small.
// memory_stats.usage includes page cache; subtract it to match `docker stats`.
let memBytes = null;
if (c.State === 'running') {
try {
const stats = await dockerRequest(`/containers/${c.Id}/stats?stream=false`);
const ms = stats && stats.memory_stats;
if (ms && typeof ms.usage === 'number') {
const cache = (ms.stats && ms.stats.cache) || 0;
memBytes = ms.usage - cache;
}
} catch (_) { memBytes = null; }
}
return {
id: c.Id.slice(0, 12),
name,
@ -90,9 +128,9 @@ router.get('/containers', async (req, res, next) => {
healthy: (c.Status || '').includes('healthy'),
ports,
cpu: 0,
mem: 0,
memBytes,
};
});
}));
res.json(out);
} catch (err) {
if (err.code === 'ENOENT' || err.code === 'EACCES') return res.json([]);

View file

@ -165,6 +165,94 @@ function pickRecorderFields(body) {
return out;
}
// Codecs that require an NVIDIA GPU on the target node.
const GPU_CODECS = ['hevc_nvenc', 'h264_nvenc'];
// Issue #163 — codec/container/audio compatibility guard. Returns null when the
// config is valid, otherwise a descriptive error string naming the bad combo.
// `nodeHasGpu` is tri-state: true (GPU present), false (no GPU), or null
// (unknown — node not resolvable at this point, so GPU is only a soft check).
//
// Rules:
// - PCM audio is only valid in MOV/MXF containers, never MP4 (an MP4 with a
// PCM track produces a corrupt/unplayable master — also part of #162).
// - HEVC is not valid in MXF in this build.
// - NVENC codecs require the target node to have a GPU.
function validateRecorderConfig(cfg, nodeHasGpu = null) {
if (!cfg) return null;
const container = String(cfg.recording_container || '').toLowerCase();
const codec = String(cfg.recording_codec || '').toLowerCase();
const audio = String(cfg.recording_audio_codec || '').toLowerCase();
// PCM audio + MP4 → reject.
if (container === 'mp4' && audio.startsWith('pcm')) {
return `Invalid combo: PCM audio (${cfg.recording_audio_codec}) is not supported in an MP4 container. Use a MOV or MXF container, or switch the audio codec to AAC.`;
}
// HEVC in MXF → reject.
if (container === 'mxf' && (codec === 'hevc' || codec === 'hevc_nvenc')) {
return `Invalid combo: HEVC (${cfg.recording_codec}) is not supported in an MXF container in this build. Use a MOV/MP4 container, or pick a DNxHR/ProRes codec for MXF.`;
}
// NVENC requires a GPU on the target node. Only a hard error when we know the
// node lacks one; unknown capability is left as a soft pass.
if (GPU_CODECS.includes(codec) && nodeHasGpu === false) {
return `Invalid combo: codec ${cfg.recording_codec} requires an NVIDIA GPU, but the target node reports no GPU. Choose a software codec (e.g. prores_hq, dnxhr_hq, h264) or assign a GPU node.`;
}
return null;
}
// Resolve whether a recorder's target node has a GPU. Returns true/false when
// the node's heartbeat capability is known, or null when it can't be resolved
// (no node assigned / no capability reported) — callers treat null as a soft
// check per validateRecorderConfig.
async function nodeHasGpuCapability(nodeId) {
if (!nodeId) return null;
try {
const r = await pool.query(
'SELECT capabilities FROM cluster_nodes WHERE id = $1',
[nodeId]
);
if (r.rows.length === 0) return null;
const caps = r.rows[0].capabilities;
const gpus = caps && caps.gpus;
if (!Array.isArray(gpus)) return null;
return gpus.length > 0;
} catch (_) {
return null;
}
}
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
// Issue #162 — after a local-spawn stop, wait for the capture container to
// finalize its master. The asset row was pre-created at start with
// status='live' (display_name = current_session_id); the ingest/finalize step
// flips it to ready/processing once the MOV/MP4 trailer is written. We poll
// until the asset leaves 'live' (or disappears) or we hit the timeout, so we
// don't DELETE the container — and SIGKILL ffmpeg — before the trailer lands.
async function waitForFinalize(recorder, { timeoutMs = 180000, intervalMs = 3000 } = {}) {
if (!recorder.current_session_id) return;
const deadline = Date.now() + timeoutMs;
while (Date.now() < deadline) {
try {
const r = await pool.query(
`SELECT 1 FROM assets
WHERE project_id = $1
AND display_name = $2
AND status = 'live'
LIMIT 1`,
[recorder.project_id, recorder.current_session_id]
);
// No live asset row left → finalize is done (or there was none to wait on).
if (r.rows.length === 0) return;
} catch (_) { /* transient DB error — keep polling until timeout */ }
await sleep(intervalMs);
}
}
// GET / - List all recorders
//
// Issue #121 — previous version fired N PG queries + N Docker inspects per
@ -255,6 +343,13 @@ router.post('/', async (req, res, next) => {
};
const row = { id: uuidv4(), status: 'stopped', ...defaults, ...fields };
// Issue #163 — reject invalid codec/container/audio combos before insert.
const createGpu = await nodeHasGpuCapability(row.node_id);
const createErr = validateRecorderConfig(row, createGpu);
if (createErr) {
return res.status(400).json({ error: createErr });
}
// Build INSERT dynamically so adding columns later means one place to update.
const cols = Object.keys(row);
const placeholders = cols.map((_, i) => `$${i + 1}`).join(', ');
@ -321,6 +416,15 @@ router.patch('/:id', requireRecorderEdit, async (req, res, next) => {
return res.status(400).json({ error: 'No fields to update' });
}
// Issue #163 — validate the resulting config (existing row overlaid with the
// incoming changes) so a PATCH can't introduce an invalid combo either.
const merged = { ...recorder, ...fields };
const patchGpu = await nodeHasGpuCapability(merged.node_id);
const patchErr = validateRecorderConfig(merged, patchGpu);
if (patchErr) {
return res.status(400).json({ error: patchErr });
}
const setClause = cols.map((k, i) => `${k} = $${i + 1}`).join(', ');
const params = cols.map(k => fields[k]);
params.push(id);
@ -496,9 +600,8 @@ router.post('/:id/start', requireRecorderEdit, async (req, res, next) => {
}
// GPU-accelerated codecs require the NVIDIA container runtime on the node.
// hevc_nvenc / h264_nvenc are the only two we currently support; extend
// this list if av1_nvenc or others are added later.
const GPU_CODECS = ['hevc_nvenc', 'h264_nvenc'];
// hevc_nvenc / h264_nvenc are the only two we currently support (see the
// module-level GPU_CODECS list); extend it if av1_nvenc or others are added.
const useGpu = GPU_CODECS.includes(recorder.recording_codec);
// Determine whether to spawn locally or via a remote node-agent.
@ -663,9 +766,13 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => {
return res.status(502).json({ error: 'Remote node failed to stop sidecar' });
}
} else {
// Issue #162 — stop WITH a grace period (t=180). Docker sends SIGTERM and
// waits up to 180s for ffmpeg to flush and write the MOV/MP4 trailer before
// it SIGKILLs. Without this the master is truncated/corrupt and the
// pre-created asset can get stuck in 'live'.
const stopRes = await dockerApi(
'POST',
`/containers/${recorder.container_id}/stop`
`/containers/${recorder.container_id}/stop?t=180`
);
// 204 = stopped, 304 = already stopped, 404 = container gone — all acceptable.
@ -678,6 +785,12 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => {
// Only attempt remove if the container existed (not 404).
if (stopRes.status !== 404) {
// Issue #162 — before removing the container, wait for the master to
// finalize (asset leaves 'live'), mirroring the remote path's reliance on
// the node-agent's clean teardown. This guards against deleting the
// container — and its lingering finalize work — too early.
await waitForFinalize(recorder);
const removeRes = await dockerApi(
'DELETE',
`/containers/${recorder.container_id}`

View file

@ -397,6 +397,25 @@ function NewRecorderModal({ open, onClose }) {
</div>
<div className="modal-section-body">
{recTab === 'video' && (
<>
{/* Codec presets one click fills codec + bitrate with a known-good
combo that passes the server-side validateRecorderConfig guard.
Container is derived from the codec (HEVC/ProRes/DNxHR MOV,
H.264 MP4), and master audio is always PCM (valid in MOV). */}
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 6, marginBottom: 12 }}>
{[
{ id: 'hevc', label: 'HEVC Master (MOV)', codec: 'hevc_nvenc', bitrate: '60' },
{ id: 'h264', label: 'H.264 Proxy-friendly (MP4)', codec: 'h264_nvenc', bitrate: '25' },
{ id: 'dnxhr', label: 'DNxHR HQ (MOV)', codec: 'dnxhr_hq', bitrate: '145' },
].map(p => (
<button key={p.id}
className={`btn ghost sm${recCodec === p.codec ? ' active' : ''}`}
onClick={() => { setRecCodec(p.codec); setRecBitrate(p.bitrate); }}
style={{ flexShrink: 0 }}>
{p.label}
</button>
))}
</div>
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 10 }}>
<div className="field">
<label className="field-label">Video codec</label>
@ -444,6 +463,7 @@ function NewRecorderModal({ open, onClose }) {
return null;
})()}
</div>
</>
)}
{recTab === 'audio' && (
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 10 }}>

View file

@ -1169,7 +1169,11 @@ function Containers() {
<span>{(c.cpu || 0).toFixed(1)}%</span>
</div>
</div>
<div className="mono" style={{ fontSize: 11.5 }}>{c.mem} MB</div>
<div className="mono" style={{ fontSize: 11.5 }}>
{c.memBytes != null
? `${Math.round(c.memBytes / 1048576)} MB`
: "N/A"}
</div>
<div className="mono" style={{ fontSize: 10.5, color: "var(--text-3)" }}>{c.ports}</div>
<div style={{ display: "flex", gap: 4 }}>
<button className="btn ghost sm" onClick={() => showLogs(c)}>Logs</button>

View file

@ -199,6 +199,45 @@ function AssetDetail({ asset, onClose }) {
// Pull a presigned hi-res URL and trigger a browser download with the
// asset's display name as the filename. Falls back to opening in a new tab.
const [downloading, setDownloading] = React.useState(false);
// Gate the download behind a one-time "large file / connection speed"
// warning, shared with the library via the df.lib.download.warnDismissed
// localStorage flag. Once dismissed, downloads start without the prompt.
const dismissForeverRef = React.useRef(false);
const requestDownload = async function() {
if (downloading) return;
let dismissed = false;
try { dismissed = localStorage.getItem('df.lib.download.warnDismissed') === '1'; } catch (_) {}
if (!dismissed) {
dismissForeverRef.current = false;
const ok = await confirm({
title: 'Download original',
message: <div>
<div style={{ fontSize: 13, color: 'var(--text-2)', lineHeight: 1.5, marginBottom: 10 }}>
You're about to download the full-length original master for <b>{asset.name}</b>.
These files can be very large and download speed depends on your connection.
</div>
<label style={{ display: 'flex', alignItems: 'center', gap: 8, fontSize: 12.5, color: 'var(--text-3)', cursor: 'pointer' }}>
<input
type="checkbox"
onChange={function(e) { dismissForeverRef.current = e.target.checked; }}
/>
Don't show this warning again
</label>
</div>,
confirmLabel: 'Download',
cancelLabel: 'Cancel',
danger: false,
});
if (!ok) return;
// Persist the dismissal only after the user confirms the download.
if (dismissForeverRef.current) {
try { localStorage.setItem('df.lib.download.warnDismissed', '1'); } catch (_) {}
}
}
downloadHires();
};
const downloadHires = function() {
if (downloading) return;
setDownloading(true);
@ -372,9 +411,11 @@ function AssetDetail({ asset, onClose }) {
</div>
</div>
<div style={{ flex: 1 }} />
<button className="btn ghost sm" onClick={downloadHires} disabled={downloading} title="Download the hi-res master file">
<Icon name="download" />{downloading ? 'Preparing…' : 'Download'}
</button>
{asset.original_s3_key && (
<button className="btn ghost sm" onClick={requestDownload} disabled={downloading} title="Download the hi-res master file">
<Icon name="download" />{downloading ? 'Preparing…' : 'Download'}
</button>
)}
<div style={{ position: 'relative' }}>
<button ref={moreBtnRef} className="icon-btn" aria-label="More actions" onClick={function(e) { e.stopPropagation(); setMenuOpen(function(v) { return !v; }); }}>
<Icon name="more" />