fix(node-agent): serialize deltacast sidecar opens to prevent BufMngr wedge
Simultaneous VHD_OpenBoardHandle calls from 8 sidecars trigger a kernel array-index-out-of-bounds in delta_x300 BufMngr.c:781 that wedges all RX channels. Serialize deltacast-only sidecar starts through a promise-chain mutex with a configurable settle delay (DELTACAST_START_STAGGER_MS, default 3500ms). All other source types (SDI, SRT, RTMP) are unaffected — they bypass the mutex entirely.
This commit is contained in:
parent
9809cdd13e
commit
b324878db9
1 changed files with 43 additions and 54 deletions
|
|
@ -29,19 +29,23 @@ const VERSION = '1.4.0';
|
|||
// interpolated into a shell string.
|
||||
const DRIVER_VENDORS = ['blackmagic', 'aja', 'deltacast', 'ndi'];
|
||||
|
||||
// Deltacast board-open stagger: serialize sidecar starts so only one
|
||||
// VHD_OpenBoardHandle is in flight at a time. Simultaneous opens trip a
|
||||
// kernel BufMngr OOB bug (delta_x300 v6.34.1, BufMngr.c:781). A promise
|
||||
// chain acts as a FIFO mutex; the settle delay lets the driver stabilize.
|
||||
// ── Deltacast board-open mutex ────────────────────────────────────────────
|
||||
// Simultaneous VHD_OpenBoardHandle calls from multiple deltacast sidecars
|
||||
// trigger a kernel array-index-out-of-bounds in delta_x300 BufMngr.c:781
|
||||
// that wedges all RX channels until the module is reloaded. Serialize
|
||||
// deltacast-only sidecar launches through a promise-chain mutex with a
|
||||
// settle delay so each board-open completes before the next one starts.
|
||||
// Configurable via DELTACAST_START_STAGGER_MS (default 3500ms). SDI, SRT,
|
||||
// and RTMP sources are unaffected.
|
||||
const DELTACAST_STAGGER_MS = parseInt(process.env.DELTACAST_START_STAGGER_MS || '3500', 10);
|
||||
let _dcMutex = Promise.resolve();
|
||||
|
||||
function acquireDeltacastSlot() {
|
||||
function acquireDcLock() {
|
||||
let release;
|
||||
const ticket = new Promise(r => { release = r; });
|
||||
const prev = _dcMutex;
|
||||
_dcMutex = prev.then(() => ticket);
|
||||
return prev.then(() => release);
|
||||
const next = new Promise(resolve => { release = resolve; });
|
||||
const wait = _dcMutex;
|
||||
_dcMutex = _dcMutex.then(() => next);
|
||||
return wait.then(() => release);
|
||||
}
|
||||
|
||||
// Pick the host's LAN IP. Inside a bridge-mode container,
|
||||
|
|
@ -181,57 +185,42 @@ async function handleSidecarStart(body, res) {
|
|||
HostConfig: hostConfig,
|
||||
};
|
||||
|
||||
// Deltacast: serialize board opens to prevent simultaneous VHD_OpenBoardHandle
|
||||
// calls from wedging the delta_x300 BufMngr (OOB bug in v6.34.1). Acquire the
|
||||
// slot (waits for any prior deltacast start to clear its settle delay), then
|
||||
// start the container, then hold for DELTACAST_STAGGER_MS before releasing.
|
||||
// Fail-open: if launch throws, the slot is still released so the next start
|
||||
// isn't blocked forever.
|
||||
// Deltacast: serialize board opens through a process-wide mutex + settle
|
||||
// delay. Concurrent VHD_OpenBoardHandle calls wedge the kernel RX buffer
|
||||
// manager (delta_x300 BufMngr.c:781 OOB). Non-deltacast sources skip
|
||||
// this entirely so SDI/SRT/RTMP start latency is unchanged.
|
||||
let release = null;
|
||||
if (sourceType === 'deltacast') {
|
||||
const release = await acquireDeltacastSlot();
|
||||
try {
|
||||
const createRes = await dockerApi('POST', '/containers/create', spec);
|
||||
if (createRes.status !== 201) {
|
||||
return jsonResponse(res, 502, { error: 'Failed to create container', details: createRes.data });
|
||||
}
|
||||
release = await acquireDcLock();
|
||||
}
|
||||
|
||||
const containerId = createRes.data.Id;
|
||||
const _u = (env.find(e => e.startsWith('MAM_API_URL=')) || '').slice(12);
|
||||
const _tok = env.some(e => e.startsWith('MAM_API_TOKEN=') && e.length > 14);
|
||||
console.log(`[sidecar-start] ${containerId} image=${image} src=${sourceType} MAM_API_URL=${_u} token=${_tok} stagger=${DELTACAST_STAGGER_MS}ms`);
|
||||
const startRes = await dockerApi('POST', `/containers/${containerId}/start`);
|
||||
if (startRes.status !== 204) {
|
||||
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
|
||||
return jsonResponse(res, 502, { error: 'Failed to start container', details: startRes.data });
|
||||
}
|
||||
|
||||
jsonResponse(res, 201, { containerId, capturePort });
|
||||
// Hold slot for settle delay AFTER responding — client can proceed,
|
||||
// but the next deltacast start won't begin until after the delay.
|
||||
await new Promise(r => setTimeout(r, DELTACAST_STAGGER_MS));
|
||||
} finally {
|
||||
release();
|
||||
let containerId;
|
||||
try {
|
||||
const createRes = await dockerApi('POST', '/containers/create', spec);
|
||||
if (createRes.status !== 201) {
|
||||
return jsonResponse(res, 502, { error: 'Failed to create container', details: createRes.data });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Non-deltacast path: no stagger needed.
|
||||
const createRes = await dockerApi('POST', '/containers/create', spec);
|
||||
if (createRes.status !== 201) {
|
||||
return jsonResponse(res, 502, { error: 'Failed to create container', details: createRes.data });
|
||||
}
|
||||
containerId = createRes.data.Id;
|
||||
const _u = (env.find(e => e.startsWith('MAM_API_URL=')) || '').slice(12);
|
||||
const _tok = env.some(e => e.startsWith('MAM_API_TOKEN=') && e.length > 14);
|
||||
console.log(`[sidecar-start] ${containerId} image=${image} src=${sourceType} MAM_API_URL=${_u} token=${_tok}`);
|
||||
const startRes = await dockerApi('POST', `/containers/${containerId}/start`);
|
||||
if (startRes.status !== 204) {
|
||||
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
|
||||
return jsonResponse(res, 502, { error: 'Failed to start container', details: startRes.data });
|
||||
}
|
||||
|
||||
const containerId = createRes.data.Id;
|
||||
const _u = (env.find(e => e.startsWith('MAM_API_URL=')) || '').slice(12);
|
||||
const _tok = env.some(e => e.startsWith('MAM_API_TOKEN=') && e.length > 14);
|
||||
console.log(`[sidecar-start] ${containerId} image=${image} src=${sourceType} MAM_API_URL=${_u} token=${_tok}`);
|
||||
const startRes = await dockerApi('POST', `/containers/${containerId}/start`);
|
||||
if (startRes.status !== 204) {
|
||||
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
|
||||
return jsonResponse(res, 502, { error: 'Failed to start container', details: startRes.data });
|
||||
}
|
||||
jsonResponse(res, 201, { containerId, capturePort });
|
||||
|
||||
jsonResponse(res, 201, { containerId, capturePort });
|
||||
// Hold the lock for the settle period AFTER responding so the caller
|
||||
// isn't blocked, but the next deltacast open is still deferred.
|
||||
if (release) {
|
||||
await new Promise(r => setTimeout(r, DELTACAST_STAGGER_MS));
|
||||
}
|
||||
} finally {
|
||||
if (release) release();
|
||||
}
|
||||
} catch (err) {
|
||||
jsonResponse(res, 500, { error: err.message });
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue