diff --git a/services/worker/src/workers/conform.js b/services/worker/src/workers/conform.js index 8748973..f899644 100644 --- a/services/worker/src/workers/conform.js +++ b/services/worker/src/workers/conform.js @@ -267,14 +267,63 @@ export const conformWorker = async (job) => { '-crf', quality === 'broadcast' ? '18' : quality === 'high' ? '23' : '28', ]; + // Concat strategy: the demuxer is fast but requires identical specs + // (codec, pixel format, framerate, resolution) across all segments. + // When sources differ (AV1 + H.264, mixed framerates, etc.) it dies + // with "Error sending frames to consumers: Invalid argument". Use the + // concat *filter* instead — it normalises each input through a filter + // graph before joining, so disparate sources work. + // + // Normalise every input to the target resolution + 1/SAR + yuv420p so + // the encode is deterministic. Output frame rate is unset; ffmpeg + // will use whatever the final filter graph emits, which the target + // codec accepts. + const wantAudio = !(audio === 'none' || audio === 'off'); + const inputArgs = []; + concatList.forEach(p => { inputArgs.push('-i', p); }); + + // Build the filter graph: scale each video stream to a consistent + // resolution + pixel format, then concat them. The audio leg only + // runs if audio is being kept. + const targetW = isProRes ? 1920 : 1920; + const targetH = 1080; + const vLabels = []; + const aLabels = []; + let normalize = ''; + for (let i = 0; i < concatList.length; i++) { + // scale=W:H force_original_aspect_ratio=decrease + pad to box keeps + // mixed-aspect sources inside the frame without distortion. + normalize += `[${i}:v:0]scale=${targetW}:${targetH}:force_original_aspect_ratio=decrease,pad=${targetW}:${targetH}:(ow-iw)/2:(oh-ih)/2,setsar=1,format=yuv420p[v${i}];`; + vLabels.push(`[v${i}]`); + if (wantAudio) { + // anullsrc as a fallback so missing audio doesn't blow up concat. + normalize += `[${i}:a:0]aresample=async=1:first_pts=0[a${i}];`; + aLabels.push(`[a${i}]`); + } + } + const n = concatList.length; + let concatExpr; + if (wantAudio) { + // interleaved [v0][a0][v1][a1]… + const interleaved = []; + for (let i = 0; i < n; i++) { interleaved.push(vLabels[i], aLabels[i]); } + concatExpr = `${interleaved.join('')}concat=n=${n}:v=1:a=1[outv][outa]`; + } else { + concatExpr = `${vLabels.join('')}concat=n=${n}:v=1:a=0[outv]`; + } + const filterComplex = normalize + concatExpr; + + const mapArgs = wantAudio ? ['-map', '[outv]', '-map', '[outa]'] : ['-map', '[outv]']; + const encodeAudio = wantAudio ? ['-c:a', 'aac', '-b:a', '320k', '-ar', '48000'] : ['-an']; + await runFFmpeg([ - '-f', 'concat', - '-safe', '0', - '-i', segmentListPath, + ...inputArgs, + '-filter_complex', filterComplex, + ...mapArgs, '-c:v', videoCodec, ...profileFlag, ...qualityArgs, - ...audioFlag, + ...encodeAudio, '-y', outputPath, ]);