diff --git a/services/mam-api/src/routes/recorders.js b/services/mam-api/src/routes/recorders.js index 04ca3ef..acfd698 100644 --- a/services/mam-api/src/routes/recorders.js +++ b/services/mam-api/src/routes/recorders.js @@ -789,43 +789,22 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => { return res.status(502).json({ error: 'Remote node failed to stop sidecar' }); } } else { - // Issue #162 — stop WITH a grace period (t=180). Docker sends SIGTERM and - // waits up to 180s for ffmpeg to flush and write the MOV/MP4 trailer before - // it SIGKILLs. Without this the master is truncated/corrupt and the - // pre-created asset can get stuck in 'live'. - const stopRes = await dockerApi( - 'POST', - `/containers/${recorder.container_id}/stop?t=180` - ); - - // 204 = stopped, 304 = already stopped, 404 = container gone — all acceptable. - if (stopRes.status !== 204 && stopRes.status !== 304 && stopRes.status !== 404) { - return res.status(500).json({ - error: 'Failed to stop container', - details: stopRes.data, - }); - } - - // Only attempt remove if the container existed (not 404). - if (stopRes.status !== 404) { - // Issue #162 — before removing the container, wait for the master to - // finalize (asset leaves 'live'), mirroring the remote path's reliance on - // the node-agent's clean teardown. This guards against deleting the - // container — and its lingering finalize work — too early. - await waitForFinalize(recorder); - - const removeRes = await dockerApi( - 'DELETE', - `/containers/${recorder.container_id}` - ); - - if (removeRes.status !== 204 && removeRes.status !== 404) { - return res.status(500).json({ - error: 'Failed to remove container', - details: removeRes.data, - }); + // Issue #162 — stop local container in the background so the HTTP stop + // request returns immediately. The container teardown (SIGTERM -> ffmpeg + // exit -> S3 upload -> post-stop callback) takes up to 180s for large files, + // which would otherwise timeout the browser/API connection. + const containerId = recorder.container_id; + (async () => { + try { + const stopRes = await dockerApi('POST', `/containers/${containerId}/stop?t=180`); + if (stopRes.status !== 404) { + await waitForFinalize(recorder); + await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {}); + } + } catch (e) { + console.error('[recorders] failed local background stop:', e.message); } - } + })(); } const updateResult = await pool.query( diff --git a/services/node-agent/index.js b/services/node-agent/index.js index f702c3b..fc71219 100644 --- a/services/node-agent/index.js +++ b/services/node-agent/index.js @@ -387,29 +387,33 @@ async function fetchContainerLogs(containerId) { async function handleSidecarStop(containerId, res) { try { console.log(`[sidecar-stop] stopping ${containerId} (grace 180s)...`); - // Grace period must exceed the capture container's shutdown work - // (finalise ffmpeg session + register asset via callback). Default - // docker stop is only 10s, which SIGKILLs capture mid-finalise and - // loses the POST /assets callback -> asset stuck 'live', no jobs. - await dockerApi('POST', `/containers/${containerId}/stop?t=180`).catch(() => {}); - // Dump the capture container's shutdown logs into our persistent log - // BEFORE removing it, so failed callbacks are diagnosable. - const logs = await fetchContainerLogs(containerId); - console.log(`[sidecar-stop] ==== capture logs for ${containerId} ====\n${logs}\n[sidecar-stop] ==== end logs ====`); - // Container has now exited gracefully (or hit the 180s cap); remove it. - await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {}); + + // Run the container teardown and cleanup in the background. The capture + // process SIGTERM handler flushes ffmpeg and uploads the file to S3 + // (taking up to 3 minutes for multi-GB files) before exiting. Returning + // immediately stops the API request timing out. + (async () => { + try { + await dockerApi('POST', `/containers/${containerId}/stop?t=180`).catch(() => {}); + const logs = await fetchContainerLogs(containerId); + console.log(`[sidecar-stop] ==== capture logs for ${containerId} ====\n${logs}\n[sidecar-stop] ==== end logs ====`); + await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {}); - // Deltacast bridge lifecycle: decrement sidecar count; stop bridge when last. - if (_containerSourceType.get(containerId) === 'deltacast') { - _containerSourceType.delete(containerId); - _dcSidecarCount--; - if (_dcSidecarCount <= 0) { - _dcSidecarCount = 0; - stopDeltacastBridge(); + // Deltacast bridge lifecycle: decrement sidecar count; stop bridge when last. + if (_containerSourceType.get(containerId) === 'deltacast') { + _containerSourceType.delete(containerId); + _dcSidecarCount--; + if (_dcSidecarCount <= 0) { + _dcSidecarCount = 0; + stopDeltacastBridge(); + } + } else { + _containerSourceType.delete(containerId); + } + } catch (err) { + console.error(`[sidecar-stop] background cleanup failed for ${containerId}:`, err.message); } - } else { - _containerSourceType.delete(containerId); - } + })(); jsonResponse(res, 200, { ok: true }); } catch (err) {