diff --git a/services/node-agent/index.js b/services/node-agent/index.js index 722f8e3..d95c8ef 100644 --- a/services/node-agent/index.js +++ b/services/node-agent/index.js @@ -29,19 +29,23 @@ const VERSION = '1.4.0'; // interpolated into a shell string. const DRIVER_VENDORS = ['blackmagic', 'aja', 'deltacast', 'ndi']; -// Deltacast board-open stagger: serialize sidecar starts so only one -// VHD_OpenBoardHandle is in flight at a time. Simultaneous opens trip a -// kernel BufMngr OOB bug (delta_x300 v6.34.1, BufMngr.c:781). A promise -// chain acts as a FIFO mutex; the settle delay lets the driver stabilize. +// ── Deltacast board-open mutex ──────────────────────────────────────────── +// Simultaneous VHD_OpenBoardHandle calls from multiple deltacast sidecars +// trigger a kernel array-index-out-of-bounds in delta_x300 BufMngr.c:781 +// that wedges all RX channels until the module is reloaded. Serialize +// deltacast-only sidecar launches through a promise-chain mutex with a +// settle delay so each board-open completes before the next one starts. +// Configurable via DELTACAST_START_STAGGER_MS (default 3500ms). SDI, SRT, +// and RTMP sources are unaffected. const DELTACAST_STAGGER_MS = parseInt(process.env.DELTACAST_START_STAGGER_MS || '3500', 10); let _dcMutex = Promise.resolve(); -function acquireDeltacastSlot() { +function acquireDcLock() { let release; - const ticket = new Promise(r => { release = r; }); - const prev = _dcMutex; - _dcMutex = prev.then(() => ticket); - return prev.then(() => release); + const next = new Promise(resolve => { release = resolve; }); + const wait = _dcMutex; + _dcMutex = _dcMutex.then(() => next); + return wait.then(() => release); } // Pick the host's LAN IP. Inside a bridge-mode container, @@ -181,57 +185,42 @@ async function handleSidecarStart(body, res) { HostConfig: hostConfig, }; - // Deltacast: serialize board opens to prevent simultaneous VHD_OpenBoardHandle - // calls from wedging the delta_x300 BufMngr (OOB bug in v6.34.1). Acquire the - // slot (waits for any prior deltacast start to clear its settle delay), then - // start the container, then hold for DELTACAST_STAGGER_MS before releasing. - // Fail-open: if launch throws, the slot is still released so the next start - // isn't blocked forever. + // Deltacast: serialize board opens through a process-wide mutex + settle + // delay. Concurrent VHD_OpenBoardHandle calls wedge the kernel RX buffer + // manager (delta_x300 BufMngr.c:781 OOB). Non-deltacast sources skip + // this entirely so SDI/SRT/RTMP start latency is unchanged. + let release = null; if (sourceType === 'deltacast') { - const release = await acquireDeltacastSlot(); - try { - const createRes = await dockerApi('POST', '/containers/create', spec); - if (createRes.status !== 201) { - return jsonResponse(res, 502, { error: 'Failed to create container', details: createRes.data }); - } + release = await acquireDcLock(); + } - const containerId = createRes.data.Id; - const _u = (env.find(e => e.startsWith('MAM_API_URL=')) || '').slice(12); - const _tok = env.some(e => e.startsWith('MAM_API_TOKEN=') && e.length > 14); - console.log(`[sidecar-start] ${containerId} image=${image} src=${sourceType} MAM_API_URL=${_u} token=${_tok} stagger=${DELTACAST_STAGGER_MS}ms`); - const startRes = await dockerApi('POST', `/containers/${containerId}/start`); - if (startRes.status !== 204) { - await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {}); - return jsonResponse(res, 502, { error: 'Failed to start container', details: startRes.data }); - } - - jsonResponse(res, 201, { containerId, capturePort }); - // Hold slot for settle delay AFTER responding — client can proceed, - // but the next deltacast start won't begin until after the delay. - await new Promise(r => setTimeout(r, DELTACAST_STAGGER_MS)); - } finally { - release(); + let containerId; + try { + const createRes = await dockerApi('POST', '/containers/create', spec); + if (createRes.status !== 201) { + return jsonResponse(res, 502, { error: 'Failed to create container', details: createRes.data }); } - return; - } - // Non-deltacast path: no stagger needed. - const createRes = await dockerApi('POST', '/containers/create', spec); - if (createRes.status !== 201) { - return jsonResponse(res, 502, { error: 'Failed to create container', details: createRes.data }); - } + containerId = createRes.data.Id; + const _u = (env.find(e => e.startsWith('MAM_API_URL=')) || '').slice(12); + const _tok = env.some(e => e.startsWith('MAM_API_TOKEN=') && e.length > 14); + console.log(`[sidecar-start] ${containerId} image=${image} src=${sourceType} MAM_API_URL=${_u} token=${_tok}`); + const startRes = await dockerApi('POST', `/containers/${containerId}/start`); + if (startRes.status !== 204) { + await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {}); + return jsonResponse(res, 502, { error: 'Failed to start container', details: startRes.data }); + } - const containerId = createRes.data.Id; - const _u = (env.find(e => e.startsWith('MAM_API_URL=')) || '').slice(12); - const _tok = env.some(e => e.startsWith('MAM_API_TOKEN=') && e.length > 14); - console.log(`[sidecar-start] ${containerId} image=${image} src=${sourceType} MAM_API_URL=${_u} token=${_tok}`); - const startRes = await dockerApi('POST', `/containers/${containerId}/start`); - if (startRes.status !== 204) { - await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {}); - return jsonResponse(res, 502, { error: 'Failed to start container', details: startRes.data }); - } + jsonResponse(res, 201, { containerId, capturePort }); - jsonResponse(res, 201, { containerId, capturePort }); + // Hold the lock for the settle period AFTER responding so the caller + // isn't blocked, but the next deltacast open is still deferred. + if (release) { + await new Promise(r => setTimeout(r, DELTACAST_STAGGER_MS)); + } + } finally { + if (release) release(); + } } catch (err) { jsonResponse(res, 500, { error: err.message }); }