feat(mam-api): extract ensureStandbySidecar + add POST /recorders/reconcile-standby

Re-provisions the persistent standby sidecar for SDI/deltacast recorders that
lost theirs (manual cleanup, node redeploy, wiped /dev/shm). Without this the
recorder falls back to slow on-demand spawn on /start, which can collide on the
capture port (EADDRINUSE). Idempotent; { force:true } recreates even when a
container_id is already set.
This commit is contained in:
Zac Gaetano 2026-06-04 03:05:00 +00:00
parent 5668c03615
commit bf4632b911

View file

@ -280,6 +280,55 @@ function buildStandbyEnv(recorder) {
];
}
// Source types that run a long-lived standby sidecar (idle-preview container
// kept up 24/7 so `record` is a sub-second HTTP call, not a Docker cold start).
const STANDBY_SOURCE_TYPES = ['deltacast', 'sdi', 'blackmagic'];
// Provision (or re-provision) the single persistent standby sidecar for one
// recorder by asking its node's agent to create the idle container. Idempotent
// at the node-agent layer (one container per capture port). Updates the
// recorder row with the new container_id + status='standby'. Returns:
// { ok, containerId?, reason? }
// Non-fatal by contract — the caller logs/aggregates; a recorder is still
// usable via the on-demand spawn fallback in /start if this fails.
async function ensureStandbySidecar(recorder) {
if (!recorder.node_id || !STANDBY_SOURCE_TYPES.includes(recorder.source_type)) {
return { ok: false, reason: 'not a standby source / no node' };
}
const { remote: isRemote, apiUrl: targetNodeApiUrl } =
await resolveNodeTarget(recorder.node_id).catch(() => ({ remote: false }));
if (!isRemote || !targetNodeApiUrl) {
return { ok: false, reason: 'node not remote/reachable' };
}
const capturePort = SIDECAR_PORT_BASE + (recorder.device_index || 0);
const useGpu = GPU_CODECS.includes(recorder.recording_codec);
const standbyRes = await fetch(`${targetNodeApiUrl}/sidecar/standby`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
image: 'wild-dragon-capture:latest',
env: buildStandbyEnv(recorder),
capturePort,
sourceType: recorder.source_type,
useGpu,
gpuUuid: recorder.gpu_uuid || null,
}),
signal: AbortSignal.timeout(15000),
});
if (!standbyRes.ok) {
return { ok: false, reason: `node-agent returned ${standbyRes.status}` };
}
const { containerId } = await standbyRes.json();
await pool.query(
`UPDATE recorders SET container_id = $1, status = 'standby', updated_at = NOW() WHERE id = $2`,
[containerId, recorder.id]
);
recorder.container_id = containerId;
recorder.status = 'standby';
console.log(`[recorders] standby sidecar spawned for ${recorder.id}: ${containerId}`);
return { ok: true, containerId };
}
// Issue #162 — after a local-spawn stop, wait for the capture container to
// finalize its master. The asset row was pre-created at start with
// status='live' (display_name = current_session_id); the ingest/finalize step
@ -432,43 +481,8 @@ router.post('/', async (req, res, next) => {
// Spawn a standby sidecar immediately for SDI/deltacast/blackmagic recorders
// that have an assigned node, so the container + bridge are ready before the
// user hits record. Non-fatal — recorder is still usable if this fails.
const STANDBY_SOURCE_TYPES = ['deltacast', 'sdi', 'blackmagic'];
if (recorder.node_id && STANDBY_SOURCE_TYPES.includes(recorder.source_type)) {
const { remote: isRemote, apiUrl: targetNodeApiUrl } = await resolveNodeTarget(recorder.node_id).catch(() => ({ remote: false }));
if (isRemote && targetNodeApiUrl) {
const capturePort = SIDECAR_PORT_BASE + (recorder.device_index || 0);
const useGpu = GPU_CODECS.includes(recorder.recording_codec);
try {
const standbyRes = await fetch(`${targetNodeApiUrl}/sidecar/standby`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
image: 'wild-dragon-capture:latest',
env: buildStandbyEnv(recorder),
capturePort,
sourceType: recorder.source_type,
useGpu,
gpuUuid: recorder.gpu_uuid || null,
}),
signal: AbortSignal.timeout(15000),
});
if (standbyRes.ok) {
const { containerId } = await standbyRes.json();
await pool.query(
`UPDATE recorders SET container_id = $1, status = 'standby', updated_at = NOW() WHERE id = $2`,
[containerId, recorder.id]
);
recorder.container_id = containerId;
recorder.status = 'standby';
console.log(`[recorders] standby sidecar spawned for ${recorder.id}: ${containerId}`);
} else {
console.warn(`[recorders] standby spawn returned ${standbyRes.status} for ${recorder.id} — will spawn on start`);
}
} catch (e) {
console.warn(`[recorders] standby spawn failed for ${recorder.id} (non-fatal): ${e.message}`);
}
}
}
await ensureStandbySidecar(recorder).catch(e =>
console.warn(`[recorders] standby spawn failed for ${recorder.id} (non-fatal): ${e.message}`));
res.status(201).json(recorder);
} catch (err) {
@ -476,6 +490,48 @@ router.post('/', async (req, res, next) => {
}
});
// POST /reconcile-standby - (re)provision the persistent standby sidecar for
// every SDI/deltacast recorder that should have one. Standby sidecars are
// created on recorder-create and kept up 24/7 (RestartPolicy=unless-stopped),
// but if they're externally removed (manual cleanup, node redeploy, a wiped
// /dev/shm) nothing recreates them — the recorder then falls back to the slow
// on-demand spawn on /start, which can collide on the capture port. This
// endpoint re-warms them so all recorders return to the fast standby path.
//
// Optional body: { force: true } recreates even recorders that currently claim
// a container_id (the node-agent is idempotent per capture port, so a stale id
// is replaced cleanly). Without force, only recorders with no container_id are
// (re)provisioned.
router.post('/reconcile-standby', requireRecorderEdit, async (req, res, next) => {
try {
const force = !!(req.body && req.body.force);
const { rows } = await pool.query(
`SELECT * FROM recorders
WHERE source_type = ANY($1)
AND node_id IS NOT NULL
ORDER BY name`,
[STANDBY_SOURCE_TYPES]
);
const results = [];
for (const recorder of rows) {
if (!force && recorder.container_id) {
results.push({ id: recorder.id, name: recorder.name, ok: true, skipped: 'already has container_id' });
continue;
}
try {
const r = await ensureStandbySidecar(recorder);
results.push({ id: recorder.id, name: recorder.name, ...r });
} catch (e) {
results.push({ id: recorder.id, name: recorder.name, ok: false, reason: e.message });
}
}
const provisioned = results.filter(r => r.ok && r.containerId).length;
res.json({ provisioned, total: rows.length, results });
} catch (err) {
next(err);
}
});
// GET /:id - Get single recorder
router.get('/:id', async (req, res, next) => {
try {
@ -970,7 +1026,6 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => {
// /start call immediately.
//
// If NOT in standby (legacy on-demand spawn), use the old docker-stop path.
const STANDBY_SOURCE_TYPES = ['deltacast', 'sdi', 'blackmagic'];
const isStandbySource = STANDBY_SOURCE_TYPES.includes(recorder.source_type);
if (isStandbySource && recorder.container_id) {