fix(node-agent): serialize deltacast sidecar opens to prevent BufMngr wedge

Simultaneous VHD_OpenBoardHandle calls from 8 sidecars trigger a kernel
array-index-out-of-bounds in delta_x300 BufMngr.c:781 that wedges all
RX channels. Serialize deltacast-only sidecar starts through a
promise-chain mutex with a configurable settle delay
(DELTACAST_START_STAGGER_MS, default 3500ms). All other source types
(SDI, SRT, RTMP) are unaffected — they bypass the mutex entirely.
This commit is contained in:
Zac Gaetano 2026-06-01 18:47:55 -04:00
parent 9809cdd13e
commit b324878db9

View file

@ -29,19 +29,23 @@ const VERSION = '1.4.0';
// interpolated into a shell string. // interpolated into a shell string.
const DRIVER_VENDORS = ['blackmagic', 'aja', 'deltacast', 'ndi']; const DRIVER_VENDORS = ['blackmagic', 'aja', 'deltacast', 'ndi'];
// Deltacast board-open stagger: serialize sidecar starts so only one // ── Deltacast board-open mutex ────────────────────────────────────────────
// VHD_OpenBoardHandle is in flight at a time. Simultaneous opens trip a // Simultaneous VHD_OpenBoardHandle calls from multiple deltacast sidecars
// kernel BufMngr OOB bug (delta_x300 v6.34.1, BufMngr.c:781). A promise // trigger a kernel array-index-out-of-bounds in delta_x300 BufMngr.c:781
// chain acts as a FIFO mutex; the settle delay lets the driver stabilize. // that wedges all RX channels until the module is reloaded. Serialize
// deltacast-only sidecar launches through a promise-chain mutex with a
// settle delay so each board-open completes before the next one starts.
// Configurable via DELTACAST_START_STAGGER_MS (default 3500ms). SDI, SRT,
// and RTMP sources are unaffected.
const DELTACAST_STAGGER_MS = parseInt(process.env.DELTACAST_START_STAGGER_MS || '3500', 10); const DELTACAST_STAGGER_MS = parseInt(process.env.DELTACAST_START_STAGGER_MS || '3500', 10);
let _dcMutex = Promise.resolve(); let _dcMutex = Promise.resolve();
function acquireDeltacastSlot() { function acquireDcLock() {
let release; let release;
const ticket = new Promise(r => { release = r; }); const next = new Promise(resolve => { release = resolve; });
const prev = _dcMutex; const wait = _dcMutex;
_dcMutex = prev.then(() => ticket); _dcMutex = _dcMutex.then(() => next);
return prev.then(() => release); return wait.then(() => release);
} }
// Pick the host's LAN IP. Inside a bridge-mode container, // Pick the host's LAN IP. Inside a bridge-mode container,
@ -181,57 +185,42 @@ async function handleSidecarStart(body, res) {
HostConfig: hostConfig, HostConfig: hostConfig,
}; };
// Deltacast: serialize board opens to prevent simultaneous VHD_OpenBoardHandle // Deltacast: serialize board opens through a process-wide mutex + settle
// calls from wedging the delta_x300 BufMngr (OOB bug in v6.34.1). Acquire the // delay. Concurrent VHD_OpenBoardHandle calls wedge the kernel RX buffer
// slot (waits for any prior deltacast start to clear its settle delay), then // manager (delta_x300 BufMngr.c:781 OOB). Non-deltacast sources skip
// start the container, then hold for DELTACAST_STAGGER_MS before releasing. // this entirely so SDI/SRT/RTMP start latency is unchanged.
// Fail-open: if launch throws, the slot is still released so the next start let release = null;
// isn't blocked forever.
if (sourceType === 'deltacast') { if (sourceType === 'deltacast') {
const release = await acquireDeltacastSlot(); release = await acquireDcLock();
try { }
const createRes = await dockerApi('POST', '/containers/create', spec);
if (createRes.status !== 201) {
return jsonResponse(res, 502, { error: 'Failed to create container', details: createRes.data });
}
const containerId = createRes.data.Id; let containerId;
const _u = (env.find(e => e.startsWith('MAM_API_URL=')) || '').slice(12); try {
const _tok = env.some(e => e.startsWith('MAM_API_TOKEN=') && e.length > 14); const createRes = await dockerApi('POST', '/containers/create', spec);
console.log(`[sidecar-start] ${containerId} image=${image} src=${sourceType} MAM_API_URL=${_u} token=${_tok} stagger=${DELTACAST_STAGGER_MS}ms`); if (createRes.status !== 201) {
const startRes = await dockerApi('POST', `/containers/${containerId}/start`); return jsonResponse(res, 502, { error: 'Failed to create container', details: createRes.data });
if (startRes.status !== 204) {
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
return jsonResponse(res, 502, { error: 'Failed to start container', details: startRes.data });
}
jsonResponse(res, 201, { containerId, capturePort });
// Hold slot for settle delay AFTER responding — client can proceed,
// but the next deltacast start won't begin until after the delay.
await new Promise(r => setTimeout(r, DELTACAST_STAGGER_MS));
} finally {
release();
} }
return;
}
// Non-deltacast path: no stagger needed. containerId = createRes.data.Id;
const createRes = await dockerApi('POST', '/containers/create', spec); const _u = (env.find(e => e.startsWith('MAM_API_URL=')) || '').slice(12);
if (createRes.status !== 201) { const _tok = env.some(e => e.startsWith('MAM_API_TOKEN=') && e.length > 14);
return jsonResponse(res, 502, { error: 'Failed to create container', details: createRes.data }); console.log(`[sidecar-start] ${containerId} image=${image} src=${sourceType} MAM_API_URL=${_u} token=${_tok}`);
} const startRes = await dockerApi('POST', `/containers/${containerId}/start`);
if (startRes.status !== 204) {
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
return jsonResponse(res, 502, { error: 'Failed to start container', details: startRes.data });
}
const containerId = createRes.data.Id; jsonResponse(res, 201, { containerId, capturePort });
const _u = (env.find(e => e.startsWith('MAM_API_URL=')) || '').slice(12);
const _tok = env.some(e => e.startsWith('MAM_API_TOKEN=') && e.length > 14);
console.log(`[sidecar-start] ${containerId} image=${image} src=${sourceType} MAM_API_URL=${_u} token=${_tok}`);
const startRes = await dockerApi('POST', `/containers/${containerId}/start`);
if (startRes.status !== 204) {
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
return jsonResponse(res, 502, { error: 'Failed to start container', details: startRes.data });
}
jsonResponse(res, 201, { containerId, capturePort }); // Hold the lock for the settle period AFTER responding so the caller
// isn't blocked, but the next deltacast open is still deferred.
if (release) {
await new Promise(r => setTimeout(r, DELTACAST_STAGGER_MS));
}
} finally {
if (release) release();
}
} catch (err) { } catch (err) {
jsonResponse(res, 500, { error: err.message }); jsonResponse(res, 500, { error: err.message });
} }