From fcf4c8bbe79a347bfa871f014abcf82071335fc3 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 28 May 2026 15:21:23 -0400 Subject: [PATCH] =?UTF-8?q?fix(worker):=20conform=20=E2=80=94=20lock=20fps?= =?UTF-8?q?=20+=20sample=20rate=20in=20concat=20filter=20graph?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the demuxer → filter switch, concat still failed with [fc#0] Error sending frames to consumers: Invalid argument on Job 8. The filter graph normalised pixels (scale+pad+yuv420p) but left the time-domain axes mixed: segment-1: 23.98 fps video, 44100 Hz audio segment-2: 60 fps video, 48000 Hz audio segment-3: … ffmpeg 8's concat filter requires identical frame rate + audio sample rate + channel layout across inputs. Force them on each leg: video: fps=, setpts=PTS-STARTPTS audio: aresample=48000, aformat=channel_layouts=stereo:sample_fmts=fltp, asetpts=PTS-STARTPTS setpts/asetpts re-zero each input's clock so concat's per-input PTS window resets cleanly between segments. Target fps comes from the sequence's frame_rate (rounded) — same axis the sequence editor stores. Sample rate is pinned to 48000 (broadcast standard) so the AAC encode is consistent. Co-Authored-By: Claude Opus 4.7 --- services/worker/src/workers/conform.js | 35 ++++++++++++++++++-------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/services/worker/src/workers/conform.js b/services/worker/src/workers/conform.js index f899644..dc6f7c0 100644 --- a/services/worker/src/workers/conform.js +++ b/services/worker/src/workers/conform.js @@ -282,29 +282,44 @@ export const conformWorker = async (job) => { const inputArgs = []; concatList.forEach(p => { inputArgs.push('-i', p); }); - // Build the filter graph: scale each video stream to a consistent - // resolution + pixel format, then concat them. The audio leg only - // runs if audio is being kept. - const targetW = isProRes ? 1920 : 1920; + // Build the filter graph. The concat filter in ffmpeg 8.x requires + // identical resolution, pixel format, SAR, FRAME RATE and audio + // SAMPLE RATE / CHANNEL LAYOUT across all inputs. Different-spec + // sources (e.g. a 23.98 fps clip + a 60 fps clip) trip + // [fc#0] Error sending frames to consumers: Invalid argument + // even though our earlier scale+pad+format pass took care of the + // pixel side. Force the time-domain axes too: + // fps= — resample video to a constant rate + // setpts=PTS-STARTPTS — re-zero PTS so concat's per-input clock + // resets cleanly + // aresample=48000 — force a single audio sample rate + // asetpts=PTS-STARTPTS — same for audio + const targetW = 1920; const targetH = 1080; + const targetFps = Math.round(seqFps) || 30; + const targetSampleRate = 48000; + const vLabels = []; const aLabels = []; let normalize = ''; for (let i = 0; i < concatList.length; i++) { - // scale=W:H force_original_aspect_ratio=decrease + pad to box keeps - // mixed-aspect sources inside the frame without distortion. - normalize += `[${i}:v:0]scale=${targetW}:${targetH}:force_original_aspect_ratio=decrease,pad=${targetW}:${targetH}:(ow-iw)/2:(oh-ih)/2,setsar=1,format=yuv420p[v${i}];`; + normalize += + `[${i}:v:0]fps=${targetFps},` + + `scale=${targetW}:${targetH}:force_original_aspect_ratio=decrease,` + + `pad=${targetW}:${targetH}:(ow-iw)/2:(oh-ih)/2,` + + `setsar=1,format=yuv420p,setpts=PTS-STARTPTS[v${i}];`; vLabels.push(`[v${i}]`); if (wantAudio) { - // anullsrc as a fallback so missing audio doesn't blow up concat. - normalize += `[${i}:a:0]aresample=async=1:first_pts=0[a${i}];`; + normalize += + `[${i}:a:0]aresample=${targetSampleRate},` + + `aformat=channel_layouts=stereo:sample_fmts=fltp,` + + `asetpts=PTS-STARTPTS[a${i}];`; aLabels.push(`[a${i}]`); } } const n = concatList.length; let concatExpr; if (wantAudio) { - // interleaved [v0][a0][v1][a1]… const interleaved = []; for (let i = 0; i < n; i++) { interleaved.push(vLabels[i], aLabels[i]); } concatExpr = `${interleaved.join('')}concat=n=${n}:v=1:a=1[outv][outa]`;