fix(recorders): stop capture containers in the background to prevent API TimeoutError on large file uploads

This commit is contained in:
Zac Gaetano 2026-06-03 00:22:36 +00:00
parent 600af4564e
commit 62b9a90291
2 changed files with 40 additions and 57 deletions

View file

@ -789,43 +789,22 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => {
return res.status(502).json({ error: 'Remote node failed to stop sidecar' });
}
} else {
// Issue #162 — stop WITH a grace period (t=180). Docker sends SIGTERM and
// waits up to 180s for ffmpeg to flush and write the MOV/MP4 trailer before
// it SIGKILLs. Without this the master is truncated/corrupt and the
// pre-created asset can get stuck in 'live'.
const stopRes = await dockerApi(
'POST',
`/containers/${recorder.container_id}/stop?t=180`
);
// 204 = stopped, 304 = already stopped, 404 = container gone — all acceptable.
if (stopRes.status !== 204 && stopRes.status !== 304 && stopRes.status !== 404) {
return res.status(500).json({
error: 'Failed to stop container',
details: stopRes.data,
});
}
// Only attempt remove if the container existed (not 404).
if (stopRes.status !== 404) {
// Issue #162 — before removing the container, wait for the master to
// finalize (asset leaves 'live'), mirroring the remote path's reliance on
// the node-agent's clean teardown. This guards against deleting the
// container — and its lingering finalize work — too early.
await waitForFinalize(recorder);
const removeRes = await dockerApi(
'DELETE',
`/containers/${recorder.container_id}`
);
if (removeRes.status !== 204 && removeRes.status !== 404) {
return res.status(500).json({
error: 'Failed to remove container',
details: removeRes.data,
});
// Issue #162 — stop local container in the background so the HTTP stop
// request returns immediately. The container teardown (SIGTERM -> ffmpeg
// exit -> S3 upload -> post-stop callback) takes up to 180s for large files,
// which would otherwise timeout the browser/API connection.
const containerId = recorder.container_id;
(async () => {
try {
const stopRes = await dockerApi('POST', `/containers/${containerId}/stop?t=180`);
if (stopRes.status !== 404) {
await waitForFinalize(recorder);
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
}
} catch (e) {
console.error('[recorders] failed local background stop:', e.message);
}
}
})();
}
const updateResult = await pool.query(

View file

@ -387,29 +387,33 @@ async function fetchContainerLogs(containerId) {
async function handleSidecarStop(containerId, res) {
try {
console.log(`[sidecar-stop] stopping ${containerId} (grace 180s)...`);
// Grace period must exceed the capture container's shutdown work
// (finalise ffmpeg session + register asset via callback). Default
// docker stop is only 10s, which SIGKILLs capture mid-finalise and
// loses the POST /assets callback -> asset stuck 'live', no jobs.
await dockerApi('POST', `/containers/${containerId}/stop?t=180`).catch(() => {});
// Dump the capture container's shutdown logs into our persistent log
// BEFORE removing it, so failed callbacks are diagnosable.
const logs = await fetchContainerLogs(containerId);
console.log(`[sidecar-stop] ==== capture logs for ${containerId} ====\n${logs}\n[sidecar-stop] ==== end logs ====`);
// Container has now exited gracefully (or hit the 180s cap); remove it.
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
// Run the container teardown and cleanup in the background. The capture
// process SIGTERM handler flushes ffmpeg and uploads the file to S3
// (taking up to 3 minutes for multi-GB files) before exiting. Returning
// immediately stops the API request timing out.
(async () => {
try {
await dockerApi('POST', `/containers/${containerId}/stop?t=180`).catch(() => {});
const logs = await fetchContainerLogs(containerId);
console.log(`[sidecar-stop] ==== capture logs for ${containerId} ====\n${logs}\n[sidecar-stop] ==== end logs ====`);
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
// Deltacast bridge lifecycle: decrement sidecar count; stop bridge when last.
if (_containerSourceType.get(containerId) === 'deltacast') {
_containerSourceType.delete(containerId);
_dcSidecarCount--;
if (_dcSidecarCount <= 0) {
_dcSidecarCount = 0;
stopDeltacastBridge();
// Deltacast bridge lifecycle: decrement sidecar count; stop bridge when last.
if (_containerSourceType.get(containerId) === 'deltacast') {
_containerSourceType.delete(containerId);
_dcSidecarCount--;
if (_dcSidecarCount <= 0) {
_dcSidecarCount = 0;
stopDeltacastBridge();
}
} else {
_containerSourceType.delete(containerId);
}
} catch (err) {
console.error(`[sidecar-stop] background cleanup failed for ${containerId}:`, err.message);
}
} else {
_containerSourceType.delete(containerId);
}
})();
jsonResponse(res, 200, { ok: true });
} catch (err) {