fix(recorders): stop capture containers in the background to prevent API TimeoutError on large file uploads
This commit is contained in:
parent
600af4564e
commit
62b9a90291
2 changed files with 40 additions and 57 deletions
|
|
@ -789,43 +789,22 @@ router.post('/:id/stop', requireRecorderEdit, async (req, res, next) => {
|
||||||
return res.status(502).json({ error: 'Remote node failed to stop sidecar' });
|
return res.status(502).json({ error: 'Remote node failed to stop sidecar' });
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Issue #162 — stop WITH a grace period (t=180). Docker sends SIGTERM and
|
// Issue #162 — stop local container in the background so the HTTP stop
|
||||||
// waits up to 180s for ffmpeg to flush and write the MOV/MP4 trailer before
|
// request returns immediately. The container teardown (SIGTERM -> ffmpeg
|
||||||
// it SIGKILLs. Without this the master is truncated/corrupt and the
|
// exit -> S3 upload -> post-stop callback) takes up to 180s for large files,
|
||||||
// pre-created asset can get stuck in 'live'.
|
// which would otherwise timeout the browser/API connection.
|
||||||
const stopRes = await dockerApi(
|
const containerId = recorder.container_id;
|
||||||
'POST',
|
(async () => {
|
||||||
`/containers/${recorder.container_id}/stop?t=180`
|
try {
|
||||||
);
|
const stopRes = await dockerApi('POST', `/containers/${containerId}/stop?t=180`);
|
||||||
|
if (stopRes.status !== 404) {
|
||||||
// 204 = stopped, 304 = already stopped, 404 = container gone — all acceptable.
|
await waitForFinalize(recorder);
|
||||||
if (stopRes.status !== 204 && stopRes.status !== 304 && stopRes.status !== 404) {
|
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
|
||||||
return res.status(500).json({
|
}
|
||||||
error: 'Failed to stop container',
|
} catch (e) {
|
||||||
details: stopRes.data,
|
console.error('[recorders] failed local background stop:', e.message);
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only attempt remove if the container existed (not 404).
|
|
||||||
if (stopRes.status !== 404) {
|
|
||||||
// Issue #162 — before removing the container, wait for the master to
|
|
||||||
// finalize (asset leaves 'live'), mirroring the remote path's reliance on
|
|
||||||
// the node-agent's clean teardown. This guards against deleting the
|
|
||||||
// container — and its lingering finalize work — too early.
|
|
||||||
await waitForFinalize(recorder);
|
|
||||||
|
|
||||||
const removeRes = await dockerApi(
|
|
||||||
'DELETE',
|
|
||||||
`/containers/${recorder.container_id}`
|
|
||||||
);
|
|
||||||
|
|
||||||
if (removeRes.status !== 204 && removeRes.status !== 404) {
|
|
||||||
return res.status(500).json({
|
|
||||||
error: 'Failed to remove container',
|
|
||||||
details: removeRes.data,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
})();
|
||||||
}
|
}
|
||||||
|
|
||||||
const updateResult = await pool.query(
|
const updateResult = await pool.query(
|
||||||
|
|
|
||||||
|
|
@ -387,29 +387,33 @@ async function fetchContainerLogs(containerId) {
|
||||||
async function handleSidecarStop(containerId, res) {
|
async function handleSidecarStop(containerId, res) {
|
||||||
try {
|
try {
|
||||||
console.log(`[sidecar-stop] stopping ${containerId} (grace 180s)...`);
|
console.log(`[sidecar-stop] stopping ${containerId} (grace 180s)...`);
|
||||||
// Grace period must exceed the capture container's shutdown work
|
|
||||||
// (finalise ffmpeg session + register asset via callback). Default
|
// Run the container teardown and cleanup in the background. The capture
|
||||||
// docker stop is only 10s, which SIGKILLs capture mid-finalise and
|
// process SIGTERM handler flushes ffmpeg and uploads the file to S3
|
||||||
// loses the POST /assets callback -> asset stuck 'live', no jobs.
|
// (taking up to 3 minutes for multi-GB files) before exiting. Returning
|
||||||
await dockerApi('POST', `/containers/${containerId}/stop?t=180`).catch(() => {});
|
// immediately stops the API request timing out.
|
||||||
// Dump the capture container's shutdown logs into our persistent log
|
(async () => {
|
||||||
// BEFORE removing it, so failed callbacks are diagnosable.
|
try {
|
||||||
const logs = await fetchContainerLogs(containerId);
|
await dockerApi('POST', `/containers/${containerId}/stop?t=180`).catch(() => {});
|
||||||
console.log(`[sidecar-stop] ==== capture logs for ${containerId} ====\n${logs}\n[sidecar-stop] ==== end logs ====`);
|
const logs = await fetchContainerLogs(containerId);
|
||||||
// Container has now exited gracefully (or hit the 180s cap); remove it.
|
console.log(`[sidecar-stop] ==== capture logs for ${containerId} ====\n${logs}\n[sidecar-stop] ==== end logs ====`);
|
||||||
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
|
await dockerApi('DELETE', `/containers/${containerId}?force=true`).catch(() => {});
|
||||||
|
|
||||||
// Deltacast bridge lifecycle: decrement sidecar count; stop bridge when last.
|
// Deltacast bridge lifecycle: decrement sidecar count; stop bridge when last.
|
||||||
if (_containerSourceType.get(containerId) === 'deltacast') {
|
if (_containerSourceType.get(containerId) === 'deltacast') {
|
||||||
_containerSourceType.delete(containerId);
|
_containerSourceType.delete(containerId);
|
||||||
_dcSidecarCount--;
|
_dcSidecarCount--;
|
||||||
if (_dcSidecarCount <= 0) {
|
if (_dcSidecarCount <= 0) {
|
||||||
_dcSidecarCount = 0;
|
_dcSidecarCount = 0;
|
||||||
stopDeltacastBridge();
|
stopDeltacastBridge();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
_containerSourceType.delete(containerId);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error(`[sidecar-stop] background cleanup failed for ${containerId}:`, err.message);
|
||||||
}
|
}
|
||||||
} else {
|
})();
|
||||||
_containerSourceType.delete(containerId);
|
|
||||||
}
|
|
||||||
|
|
||||||
jsonResponse(res, 200, { ok: true });
|
jsonResponse(res, 200, { ok: true });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue