fix(recorders): stop capture containers in the background to prevent API TimeoutError on large file uploads
This commit is contained in:
parent
600af4564e
commit
62b9a90291
2 changed files with 40 additions and 57 deletions
|
|
@ -789,43 +789,22 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => {
|
|||
return res.status(502).json({ error: 'Remote node failed to stop sidecar' });
|
||||
}
|
||||
} else {
|
||||
// Issue #162 — stop WITH a grace period (t=180). Docker sends SIGTERM and
|
||||
// waits up to 180s for ffmpeg to flush and write the MOV/MP4 trailer before
|
||||
// it SIGKILLs. Without this the master is truncated/corrupt and the
|
||||
// pre-created asset can get stuck in 'live'.
|
||||
const stopRes = await dockerApi(
|
||||
'POST',
|
||||
`/containers/${recorder.container_id}/stop?t=180`
|
||||
);
|
||||
|
||||
// 204 = stopped, 304 = already stopped, 404 = container gone — all acceptable.
|
||||
if (stopRes.status !== 204 && stopRes.status !== 304 && stopRes.status !== 404) {
|
||||
return res.status(500).json({
|
||||
error: 'Failed to stop container',
|
||||
details: stopRes.data,
|
||||
});
|
||||
}
|
||||
|
||||
// Only attempt remove if the container existed (not 404).
|
||||
if (stopRes.status !== 404) {
|
||||
// Issue #162 — before removing the container, wait for the master to
|
||||
// finalize (asset leaves 'live'), mirroring the remote path's reliance on
|
||||
// the node-agent's clean teardown. This guards against deleting the
|
||||
// container — and its lingering finalize work — too early.
|
||||
await waitForFinalize(recorder);
|
||||
|
||||
const removeRes = await dockerApi(
|
||||
'DELETE',
|
||||
`/containers/${recorder.container_id}`
|
||||
);
|
||||
|
||||
if (removeRes.status !== 204 && removeRes.status !== 404) {
|
||||
return res.status(500).json({
|
||||
error: 'Failed to remove container',
|
||||
details: removeRes.data,
|
||||
});
|
||||
// Issue #162 — stop local container in the background so the HTTP stop
|
||||
// request returns immediately. The container teardown (SIGTERM -> ffmpeg
|
||||
// exit -> S3 upload -> post-stop callback) takes up to 180s for large files,
|
||||
// which would otherwise timeout the browser/API connection.
|
||||
const containerId = recorder.container_id;
|
||||
(async () => {
|
||||
try {
|
||||
const stopRes = await dockerApi('POST', `/containers/${containerId}/stop?t=180`);
|
||||
if (stopRes.status !== 404) {
|
||||
await waitForFinalize(recorder);
|
||||
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('[recorders] failed local background stop:', e.message);
|
||||
}
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
const updateResult = await pool.query(
|
||||
|
|
|
|||
|
|
@ -387,29 +387,33 @@ async function fetchContainerLogs(containerId) {
|
|||
async function handleSidecarStop(containerId, res) {
|
||||
try {
|
||||
console.log(`[sidecar-stop] stopping ${containerId} (grace 180s)...`);
|
||||
// Grace period must exceed the capture container's shutdown work
|
||||
// (finalise ffmpeg session + register asset via callback). Default
|
||||
// docker stop is only 10s, which SIGKILLs capture mid-finalise and
|
||||
// loses the POST /assets callback -> asset stuck 'live', no jobs.
|
||||
await dockerApi('POST', `/containers/${containerId}/stop?t=180`).catch(() => {});
|
||||
// Dump the capture container's shutdown logs into our persistent log
|
||||
// BEFORE removing it, so failed callbacks are diagnosable.
|
||||
const logs = await fetchContainerLogs(containerId);
|
||||
console.log(`[sidecar-stop] ==== capture logs for ${containerId} ====\n${logs}\n[sidecar-stop] ==== end logs ====`);
|
||||
// Container has now exited gracefully (or hit the 180s cap); remove it.
|
||||
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
|
||||
|
||||
// Run the container teardown and cleanup in the background. The capture
|
||||
// process SIGTERM handler flushes ffmpeg and uploads the file to S3
|
||||
// (taking up to 3 minutes for multi-GB files) before exiting. Returning
|
||||
// immediately stops the API request timing out.
|
||||
(async () => {
|
||||
try {
|
||||
await dockerApi('POST', `/containers/${containerId}/stop?t=180`).catch(() => {});
|
||||
const logs = await fetchContainerLogs(containerId);
|
||||
console.log(`[sidecar-stop] ==== capture logs for ${containerId} ====\n${logs}\n[sidecar-stop] ==== end logs ====`);
|
||||
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
|
||||
|
||||
// Deltacast bridge lifecycle: decrement sidecar count; stop bridge when last.
|
||||
if (_containerSourceType.get(containerId) === 'deltacast') {
|
||||
_containerSourceType.delete(containerId);
|
||||
_dcSidecarCount--;
|
||||
if (_dcSidecarCount <= 0) {
|
||||
_dcSidecarCount = 0;
|
||||
stopDeltacastBridge();
|
||||
// Deltacast bridge lifecycle: decrement sidecar count; stop bridge when last.
|
||||
if (_containerSourceType.get(containerId) === 'deltacast') {
|
||||
_containerSourceType.delete(containerId);
|
||||
_dcSidecarCount--;
|
||||
if (_dcSidecarCount <= 0) {
|
||||
_dcSidecarCount = 0;
|
||||
stopDeltacastBridge();
|
||||
}
|
||||
} else {
|
||||
_containerSourceType.delete(containerId);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`[sidecar-stop] background cleanup failed for ${containerId}:`, err.message);
|
||||
}
|
||||
} else {
|
||||
_containerSourceType.delete(containerId);
|
||||
}
|
||||
})();
|
||||
|
||||
jsonResponse(res, 200, { ok: true });
|
||||
} catch (err) {
|
||||
|
|
|
|||
Loading…
Reference in a new issue