fix/feat: recorder finalize-grace + codec validation, cluster mem/version, library download
#162 local-spawn stop now uses /stop?t=180 + waits for asset to leave 'live' before removing the container (no more SIGKILL-corrupted masters / stuck-live). #163 validateRecorderConfig guard (PCM!=MP4, HEVC!=MXF, NVENC needs GPU) on create+PATCH; codec presets in new-recorder modal. #159 container list reads Docker /stats memory (N/A when null) + UI render. #160 primary node self-populates version + uptime on the Cluster screen. #145 asset-detail Download original gated by dismissable size warning. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
794b9d9929
commit
ca1eec0600
5 changed files with 231 additions and 15 deletions
|
|
@ -1,10 +1,27 @@
|
|||
import express from 'express';
|
||||
import http from 'http';
|
||||
import os from 'os';
|
||||
import pool from '../db/pool.js';
|
||||
import { requireAdmin } from '../middleware/auth.js';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
// Hostname the primary mam-api self-registers as (mirrors selfHeartbeat()).
|
||||
const SELF_HOSTNAME = process.env.NODE_HOSTNAME || os.hostname();
|
||||
|
||||
// Format a process uptime (seconds) the way the Cluster UI expects — a short
|
||||
// human string like "3d 4h" / "12m". Workers don't report uptime today, so the
|
||||
// primary is the only row that populates this.
|
||||
function formatUptime(seconds) {
|
||||
const s = Math.floor(seconds);
|
||||
const d = Math.floor(s / 86400);
|
||||
const h = Math.floor((s % 86400) / 3600);
|
||||
const m = Math.floor((s % 3600) / 60);
|
||||
if (d > 0) return `${d}d ${h}h`;
|
||||
if (h > 0) return `${h}h ${m}m`;
|
||||
return `${m}m`;
|
||||
}
|
||||
|
||||
// GET /onboard-info – admin-only. Supplies the Add Node wizard with the bits it
|
||||
// needs to build a `curl … | bash` onboarding command: the primary API URL the
|
||||
// remote node-agent should heartbeat to, the raw URL of onboard-node.sh, and
|
||||
|
|
@ -63,10 +80,17 @@ router.get('/', async (req, res, next) => {
|
|||
FROM cluster_nodes
|
||||
ORDER BY registered_at ASC`
|
||||
);
|
||||
res.json(r.rows.map(row => ({
|
||||
...row,
|
||||
online: Number(row.stale_seconds) < 120,
|
||||
})));
|
||||
res.json(r.rows.map(row => {
|
||||
const out = { ...row, online: Number(row.stale_seconds) < 120 };
|
||||
// The primary (this mam-api host) does not heartbeat via the node-agent,
|
||||
// so its version/uptime are never populated. Self-populate them here so
|
||||
// the Cluster screen renders them like worker nodes instead of dashes.
|
||||
if (row.role === 'primary' && row.hostname === SELF_HOSTNAME) {
|
||||
out.version = process.env.npm_package_version || row.version || null;
|
||||
out.uptime = formatUptime(process.uptime());
|
||||
}
|
||||
return out;
|
||||
}));
|
||||
} catch (err) { next(err); }
|
||||
});
|
||||
|
||||
|
|
@ -74,13 +98,27 @@ router.get('/containers', async (req, res, next) => {
|
|||
try {
|
||||
const containers = await dockerRequest('/containers/json?all=true');
|
||||
if (!Array.isArray(containers)) return res.json([]);
|
||||
const out = containers.map(c => {
|
||||
const out = await Promise.all(containers.map(async c => {
|
||||
const rawName = (c.Names[0] || '').replace(/^\//, '');
|
||||
const name = rawName.replace(/^wild-dragon-/, '').replace(/-\d+$/, '');
|
||||
const ports = (c.Ports || [])
|
||||
.filter(p => p.PublicPort)
|
||||
.map(p => `${p.PublicPort}→${p.PrivatePort}`)
|
||||
.join(', ');
|
||||
// Live memory usage requires a per-container stats call (the list endpoint
|
||||
// doesn't include it). One extra Docker call each, but the list is small.
|
||||
// memory_stats.usage includes page cache; subtract it to match `docker stats`.
|
||||
let memBytes = null;
|
||||
if (c.State === 'running') {
|
||||
try {
|
||||
const stats = await dockerRequest(`/containers/${c.Id}/stats?stream=false`);
|
||||
const ms = stats && stats.memory_stats;
|
||||
if (ms && typeof ms.usage === 'number') {
|
||||
const cache = (ms.stats && ms.stats.cache) || 0;
|
||||
memBytes = ms.usage - cache;
|
||||
}
|
||||
} catch (_) { memBytes = null; }
|
||||
}
|
||||
return {
|
||||
id: c.Id.slice(0, 12),
|
||||
name,
|
||||
|
|
@ -90,9 +128,9 @@ router.get('/containers', async (req, res, next) => {
|
|||
healthy: (c.Status || '').includes('healthy'),
|
||||
ports,
|
||||
cpu: 0,
|
||||
mem: 0,
|
||||
memBytes,
|
||||
};
|
||||
});
|
||||
}));
|
||||
res.json(out);
|
||||
} catch (err) {
|
||||
if (err.code === 'ENOENT' || err.code === 'EACCES') return res.json([]);
|
||||
|
|
|
|||
|
|
@ -165,6 +165,94 @@ function pickRecorderFields(body) {
|
|||
return out;
|
||||
}
|
||||
|
||||
// Codecs that require an NVIDIA GPU on the target node.
|
||||
const GPU_CODECS = ['hevc_nvenc', 'h264_nvenc'];
|
||||
|
||||
// Issue #163 — codec/container/audio compatibility guard. Returns null when the
|
||||
// config is valid, otherwise a descriptive error string naming the bad combo.
|
||||
// `nodeHasGpu` is tri-state: true (GPU present), false (no GPU), or null
|
||||
// (unknown — node not resolvable at this point, so GPU is only a soft check).
|
||||
//
|
||||
// Rules:
|
||||
// - PCM audio is only valid in MOV/MXF containers, never MP4 (an MP4 with a
|
||||
// PCM track produces a corrupt/unplayable master — also part of #162).
|
||||
// - HEVC is not valid in MXF in this build.
|
||||
// - NVENC codecs require the target node to have a GPU.
|
||||
function validateRecorderConfig(cfg, nodeHasGpu = null) {
|
||||
if (!cfg) return null;
|
||||
|
||||
const container = String(cfg.recording_container || '').toLowerCase();
|
||||
const codec = String(cfg.recording_codec || '').toLowerCase();
|
||||
const audio = String(cfg.recording_audio_codec || '').toLowerCase();
|
||||
|
||||
// PCM audio + MP4 → reject.
|
||||
if (container === 'mp4' && audio.startsWith('pcm')) {
|
||||
return `Invalid combo: PCM audio (${cfg.recording_audio_codec}) is not supported in an MP4 container. Use a MOV or MXF container, or switch the audio codec to AAC.`;
|
||||
}
|
||||
|
||||
// HEVC in MXF → reject.
|
||||
if (container === 'mxf' && (codec === 'hevc' || codec === 'hevc_nvenc')) {
|
||||
return `Invalid combo: HEVC (${cfg.recording_codec}) is not supported in an MXF container in this build. Use a MOV/MP4 container, or pick a DNxHR/ProRes codec for MXF.`;
|
||||
}
|
||||
|
||||
// NVENC requires a GPU on the target node. Only a hard error when we know the
|
||||
// node lacks one; unknown capability is left as a soft pass.
|
||||
if (GPU_CODECS.includes(codec) && nodeHasGpu === false) {
|
||||
return `Invalid combo: codec ${cfg.recording_codec} requires an NVIDIA GPU, but the target node reports no GPU. Choose a software codec (e.g. prores_hq, dnxhr_hq, h264) or assign a GPU node.`;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// Resolve whether a recorder's target node has a GPU. Returns true/false when
|
||||
// the node's heartbeat capability is known, or null when it can't be resolved
|
||||
// (no node assigned / no capability reported) — callers treat null as a soft
|
||||
// check per validateRecorderConfig.
|
||||
async function nodeHasGpuCapability(nodeId) {
|
||||
if (!nodeId) return null;
|
||||
try {
|
||||
const r = await pool.query(
|
||||
'SELECT capabilities FROM cluster_nodes WHERE id = $1',
|
||||
[nodeId]
|
||||
);
|
||||
if (r.rows.length === 0) return null;
|
||||
const caps = r.rows[0].capabilities;
|
||||
const gpus = caps && caps.gpus;
|
||||
if (!Array.isArray(gpus)) return null;
|
||||
return gpus.length > 0;
|
||||
} catch (_) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const sleep = (ms) => new Promise(r => setTimeout(r, ms));
|
||||
|
||||
// Issue #162 — after a local-spawn stop, wait for the capture container to
|
||||
// finalize its master. The asset row was pre-created at start with
|
||||
// status='live' (display_name = current_session_id); the ingest/finalize step
|
||||
// flips it to ready/processing once the MOV/MP4 trailer is written. We poll
|
||||
// until the asset leaves 'live' (or disappears) or we hit the timeout, so we
|
||||
// don't DELETE the container — and SIGKILL ffmpeg — before the trailer lands.
|
||||
async function waitForFinalize(recorder, { timeoutMs = 180000, intervalMs = 3000 } = {}) {
|
||||
if (!recorder.current_session_id) return;
|
||||
const deadline = Date.now() + timeoutMs;
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
const r = await pool.query(
|
||||
`SELECT 1 FROM assets
|
||||
WHERE project_id = $1
|
||||
AND display_name = $2
|
||||
AND status = 'live'
|
||||
LIMIT 1`,
|
||||
[recorder.project_id, recorder.current_session_id]
|
||||
);
|
||||
// No live asset row left → finalize is done (or there was none to wait on).
|
||||
if (r.rows.length === 0) return;
|
||||
} catch (_) { /* transient DB error — keep polling until timeout */ }
|
||||
await sleep(intervalMs);
|
||||
}
|
||||
}
|
||||
|
||||
// GET / - List all recorders
|
||||
//
|
||||
// Issue #121 — previous version fired N PG queries + N Docker inspects per
|
||||
|
|
@ -255,6 +343,13 @@ router.post('/', async (req, res, next) => {
|
|||
};
|
||||
const row = { id: uuidv4(), status: 'stopped', ...defaults, ...fields };
|
||||
|
||||
// Issue #163 — reject invalid codec/container/audio combos before insert.
|
||||
const createGpu = await nodeHasGpuCapability(row.node_id);
|
||||
const createErr = validateRecorderConfig(row, createGpu);
|
||||
if (createErr) {
|
||||
return res.status(400).json({ error: createErr });
|
||||
}
|
||||
|
||||
// Build INSERT dynamically so adding columns later means one place to update.
|
||||
const cols = Object.keys(row);
|
||||
const placeholders = cols.map((_, i) => `$${i + 1}`).join(', ');
|
||||
|
|
@ -321,6 +416,15 @@ router.patch('/:id', requireRecorderEdit, async (req, res, next) => {
|
|||
return res.status(400).json({ error: 'No fields to update' });
|
||||
}
|
||||
|
||||
// Issue #163 — validate the resulting config (existing row overlaid with the
|
||||
// incoming changes) so a PATCH can't introduce an invalid combo either.
|
||||
const merged = { ...recorder, ...fields };
|
||||
const patchGpu = await nodeHasGpuCapability(merged.node_id);
|
||||
const patchErr = validateRecorderConfig(merged, patchGpu);
|
||||
if (patchErr) {
|
||||
return res.status(400).json({ error: patchErr });
|
||||
}
|
||||
|
||||
const setClause = cols.map((k, i) => `${k} = $${i + 1}`).join(', ');
|
||||
const params = cols.map(k => fields[k]);
|
||||
params.push(id);
|
||||
|
|
@ -496,9 +600,8 @@ router.post('/:id/start', requireRecorderEdit, async (req, res, next) => {
|
|||
}
|
||||
|
||||
// GPU-accelerated codecs require the NVIDIA container runtime on the node.
|
||||
// hevc_nvenc / h264_nvenc are the only two we currently support; extend
|
||||
// this list if av1_nvenc or others are added later.
|
||||
const GPU_CODECS = ['hevc_nvenc', 'h264_nvenc'];
|
||||
// hevc_nvenc / h264_nvenc are the only two we currently support (see the
|
||||
// module-level GPU_CODECS list); extend it if av1_nvenc or others are added.
|
||||
const useGpu = GPU_CODECS.includes(recorder.recording_codec);
|
||||
|
||||
// Determine whether to spawn locally or via a remote node-agent.
|
||||
|
|
@ -663,9 +766,13 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => {
|
|||
return res.status(502).json({ error: 'Remote node failed to stop sidecar' });
|
||||
}
|
||||
} else {
|
||||
// Issue #162 — stop WITH a grace period (t=180). Docker sends SIGTERM and
|
||||
// waits up to 180s for ffmpeg to flush and write the MOV/MP4 trailer before
|
||||
// it SIGKILLs. Without this the master is truncated/corrupt and the
|
||||
// pre-created asset can get stuck in 'live'.
|
||||
const stopRes = await dockerApi(
|
||||
'POST',
|
||||
`/containers/${recorder.container_id}/stop`
|
||||
`/containers/${recorder.container_id}/stop?t=180`
|
||||
);
|
||||
|
||||
// 204 = stopped, 304 = already stopped, 404 = container gone — all acceptable.
|
||||
|
|
@ -678,6 +785,12 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => {
|
|||
|
||||
// Only attempt remove if the container existed (not 404).
|
||||
if (stopRes.status !== 404) {
|
||||
// Issue #162 — before removing the container, wait for the master to
|
||||
// finalize (asset leaves 'live'), mirroring the remote path's reliance on
|
||||
// the node-agent's clean teardown. This guards against deleting the
|
||||
// container — and its lingering finalize work — too early.
|
||||
await waitForFinalize(recorder);
|
||||
|
||||
const removeRes = await dockerApi(
|
||||
'DELETE',
|
||||
`/containers/${recorder.container_id}`
|
||||
|
|
|
|||
|
|
@ -397,6 +397,25 @@ function NewRecorderModal({ open, onClose }) {
|
|||
</div>
|
||||
<div className="modal-section-body">
|
||||
{recTab === 'video' && (
|
||||
<>
|
||||
{/* Codec presets — one click fills codec + bitrate with a known-good
|
||||
combo that passes the server-side validateRecorderConfig guard.
|
||||
Container is derived from the codec (HEVC/ProRes/DNxHR → MOV,
|
||||
H.264 → MP4), and master audio is always PCM (valid in MOV). */}
|
||||
<div style={{ display: 'flex', flexWrap: 'wrap', gap: 6, marginBottom: 12 }}>
|
||||
{[
|
||||
{ id: 'hevc', label: 'HEVC Master (MOV)', codec: 'hevc_nvenc', bitrate: '60' },
|
||||
{ id: 'h264', label: 'H.264 Proxy-friendly (MP4)', codec: 'h264_nvenc', bitrate: '25' },
|
||||
{ id: 'dnxhr', label: 'DNxHR HQ (MOV)', codec: 'dnxhr_hq', bitrate: '145' },
|
||||
].map(p => (
|
||||
<button key={p.id}
|
||||
className={`btn ghost sm${recCodec === p.codec ? ' active' : ''}`}
|
||||
onClick={() => { setRecCodec(p.codec); setRecBitrate(p.bitrate); }}
|
||||
style={{ flexShrink: 0 }}>
|
||||
{p.label}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 10 }}>
|
||||
<div className="field">
|
||||
<label className="field-label">Video codec</label>
|
||||
|
|
@ -444,6 +463,7 @@ function NewRecorderModal({ open, onClose }) {
|
|||
return null;
|
||||
})()}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
{recTab === 'audio' && (
|
||||
<div style={{ display: 'grid', gridTemplateColumns: '1fr 1fr', gap: 10 }}>
|
||||
|
|
|
|||
|
|
@ -1169,7 +1169,11 @@ function Containers() {
|
|||
<span>{(c.cpu || 0).toFixed(1)}%</span>
|
||||
</div>
|
||||
</div>
|
||||
<div className="mono" style={{ fontSize: 11.5 }}>{c.mem} MB</div>
|
||||
<div className="mono" style={{ fontSize: 11.5 }}>
|
||||
{c.memBytes != null
|
||||
? `${Math.round(c.memBytes / 1048576)} MB`
|
||||
: "N/A"}
|
||||
</div>
|
||||
<div className="mono" style={{ fontSize: 10.5, color: "var(--text-3)" }}>{c.ports}</div>
|
||||
<div style={{ display: "flex", gap: 4 }}>
|
||||
<button className="btn ghost sm" onClick={() => showLogs(c)}>Logs</button>
|
||||
|
|
|
|||
|
|
@ -199,6 +199,45 @@ function AssetDetail({ asset, onClose }) {
|
|||
// Pull a presigned hi-res URL and trigger a browser download with the
|
||||
// asset's display name as the filename. Falls back to opening in a new tab.
|
||||
const [downloading, setDownloading] = React.useState(false);
|
||||
|
||||
// Gate the download behind a one-time "large file / connection speed"
|
||||
// warning, shared with the library via the df.lib.download.warnDismissed
|
||||
// localStorage flag. Once dismissed, downloads start without the prompt.
|
||||
const dismissForeverRef = React.useRef(false);
|
||||
const requestDownload = async function() {
|
||||
if (downloading) return;
|
||||
let dismissed = false;
|
||||
try { dismissed = localStorage.getItem('df.lib.download.warnDismissed') === '1'; } catch (_) {}
|
||||
if (!dismissed) {
|
||||
dismissForeverRef.current = false;
|
||||
const ok = await confirm({
|
||||
title: 'Download original',
|
||||
message: <div>
|
||||
<div style={{ fontSize: 13, color: 'var(--text-2)', lineHeight: 1.5, marginBottom: 10 }}>
|
||||
You're about to download the full-length original master for <b>{asset.name}</b>.
|
||||
These files can be very large and download speed depends on your connection.
|
||||
</div>
|
||||
<label style={{ display: 'flex', alignItems: 'center', gap: 8, fontSize: 12.5, color: 'var(--text-3)', cursor: 'pointer' }}>
|
||||
<input
|
||||
type="checkbox"
|
||||
onChange={function(e) { dismissForeverRef.current = e.target.checked; }}
|
||||
/>
|
||||
Don't show this warning again
|
||||
</label>
|
||||
</div>,
|
||||
confirmLabel: 'Download',
|
||||
cancelLabel: 'Cancel',
|
||||
danger: false,
|
||||
});
|
||||
if (!ok) return;
|
||||
// Persist the dismissal only after the user confirms the download.
|
||||
if (dismissForeverRef.current) {
|
||||
try { localStorage.setItem('df.lib.download.warnDismissed', '1'); } catch (_) {}
|
||||
}
|
||||
}
|
||||
downloadHires();
|
||||
};
|
||||
|
||||
const downloadHires = function() {
|
||||
if (downloading) return;
|
||||
setDownloading(true);
|
||||
|
|
@ -372,9 +411,11 @@ function AssetDetail({ asset, onClose }) {
|
|||
</div>
|
||||
</div>
|
||||
<div style={{ flex: 1 }} />
|
||||
<button className="btn ghost sm" onClick={downloadHires} disabled={downloading} title="Download the hi-res master file">
|
||||
<Icon name="download" />{downloading ? 'Preparing…' : 'Download'}
|
||||
</button>
|
||||
{asset.original_s3_key && (
|
||||
<button className="btn ghost sm" onClick={requestDownload} disabled={downloading} title="Download the hi-res master file">
|
||||
<Icon name="download" />{downloading ? 'Preparing…' : 'Download'}
|
||||
</button>
|
||||
)}
|
||||
<div style={{ position: 'relative' }}>
|
||||
<button ref={moreBtnRef} className="icon-btn" aria-label="More actions" onClick={function(e) { e.stopPropagation(); setMenuOpen(function(v) { return !v; }); }}>
|
||||
<Icon name="more" />
|
||||
|
|
|
|||
Loading…
Reference in a new issue