fix(worker): conform — concat-filter for mixed source formats

ffmpeg concat demuxer dies with "Error sending frames to consumers: Invalid argument" when input segments don't share codec / pixel format / framerate / resolution. Mixed-source timelines hit this every time — e.g. an AV1 clip + an H.264 clip going through the same concat. Switch to the concat *filter*. It re-encodes through a filter graph so disparate inputs are normalised inline. Each input is scaled to 1920x1080 with letterbox, format=yuv420p, audio resampled. concat=n=N joins them into [outv]/[outa]. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-28 15:04:55 -04:00 · 2026-05-28 15:04:55 -04:00 · 94b6710e2d
commit 94b6710e2d
parent 6412b5c252
1 changed files with 53 additions and 4 deletions
--- a/services/worker/src/workers/conform.js
+++ b/services/worker/src/workers/conform.js
@ -267,14 +267,63 @@ export const conformWorker = async (job) => {
          '-crf',    quality === 'broadcast' ? '18' : quality === 'high' ? '23' : '28',
        ];

+    // Concat strategy: the demuxer is fast but requires identical specs
+    // (codec, pixel format, framerate, resolution) across all segments.
+    // When sources differ (AV1 + H.264, mixed framerates, etc.) it dies
+    // with "Error sending frames to consumers: Invalid argument". Use the
+    // concat *filter* instead — it normalises each input through a filter
+    // graph before joining, so disparate sources work.
+    //
+    // Normalise every input to the target resolution + 1/SAR + yuv420p so
+    // the encode is deterministic. Output frame rate is unset; ffmpeg
+    // will use whatever the final filter graph emits, which the target
+    // codec accepts.
+    const wantAudio = !(audio === 'none' || audio === 'off');
+    const inputArgs = [];
+    concatList.forEach(p => { inputArgs.push('-i', p); });
+
+    // Build the filter graph: scale each video stream to a consistent
+    // resolution + pixel format, then concat them. The audio leg only
+    // runs if audio is being kept.
+    const targetW = isProRes ? 1920 : 1920;
+    const targetH = 1080;
+    const vLabels = [];
+    const aLabels = [];
+    let normalize = '';
+    for (let i = 0; i < concatList.length; i++) {
+      // scale=W:H force_original_aspect_ratio=decrease + pad to box keeps
+      // mixed-aspect sources inside the frame without distortion.
+      normalize += `[${i}:v:0]scale=${targetW}:${targetH}:force_original_aspect_ratio=decrease,pad=${targetW}:${targetH}:(ow-iw)/2:(oh-ih)/2,setsar=1,format=yuv420p[v${i}];`;
+      vLabels.push(`[v${i}]`);
+      if (wantAudio) {
+        // anullsrc as a fallback so missing audio doesn't blow up concat.
+        normalize += `[${i}:a:0]aresample=async=1:first_pts=0[a${i}];`;
+        aLabels.push(`[a${i}]`);
+      }
+    }
+    const n = concatList.length;
+    let concatExpr;
+    if (wantAudio) {
+      // interleaved [v0][a0][v1][a1]…
+      const interleaved = [];
+      for (let i = 0; i < n; i++) { interleaved.push(vLabels[i], aLabels[i]); }
+      concatExpr = `${interleaved.join('')}concat=n=${n}:v=1:a=1[outv][outa]`;
+    } else {
+      concatExpr = `${vLabels.join('')}concat=n=${n}:v=1:a=0[outv]`;
+    }
+    const filterComplex = normalize + concatExpr;
+
+    const mapArgs = wantAudio ? ['-map', '[outv]', '-map', '[outa]'] : ['-map', '[outv]'];
+    const encodeAudio = wantAudio ? ['-c:a', 'aac', '-b:a', '320k', '-ar', '48000'] : ['-an'];
+
    await runFFmpeg([
-      '-f', 'concat',
-      '-safe', '0',
-      '-i', segmentListPath,
+      ...inputArgs,
+      '-filter_complex', filterComplex,
+      ...mapArgs,
      '-c:v', videoCodec,
      ...profileFlag,
      ...qualityArgs,
-      ...audioFlag,
+      ...encodeAudio,
      '-y', outputPath,
    ]);