fix(worker): conform — 2-pass strategy (normalise on trim, demux on concat)
ffmpeg 8.x's concat filter kept dying with the opaque [fc#0] Error sending frames to consumers: Invalid argument even after we locked fps + sample rate + pixel format + SAR in the filter graph. Mixed sources (AV1+H.264, 23.98+60 fps, 44100+48000 Hz, tv-range+unspecified-range pixel format) just don't survive the concat filter cleanly in this build. Switch to the more reliable 2-pass pattern: 1. At the trim step, re-encode each segment to a uniform intermediate spec: libx264 ultrafast, 1920x1080 (letterboxed), yuv420p, seqFps target rate, 48kHz stereo AAC. Per-segment ffmpeg. 2. At the concat step, use the concat *demuxer*. Because every input now matches exactly, the demuxer is well-behaved. Transcode the concatenated stream to the final target codec (ProRes 422 HQ etc). Costs an extra intermediate encode (libx264 ultrafast ≈ realtime on this hardware) but eliminates the filter-graph fragility on mixed- source timelines, which is the workload that actually matters. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
fcf4c8bbe7
commit
686b90294b
1 changed files with 41 additions and 68 deletions
|
|
@ -3,7 +3,7 @@ import { unlink, writeFile, mkdir, rm } from 'fs/promises';
|
|||
import { tmpdir } from 'os';
|
||||
import { query } from '../db/client.js';
|
||||
import { downloadFromS3, uploadToS3 } from '../s3/client.js';
|
||||
import { trimSegment, concatSegments, runFFmpeg } from '../ffmpeg/executor.js';
|
||||
import { trimSegment, concatSegments, runFFmpeg, getMediaInfo } from '../ffmpeg/executor.js';
|
||||
import { parseEDL } from '../edl/parser.js';
|
||||
import { XMLParser } from 'fast-xml-parser';
|
||||
|
||||
|
|
@ -217,8 +217,34 @@ export const conformWorker = async (job) => {
|
|||
console.log(`[conform] Downloading segment ${edit.editNumber} from S3 (${sourceKey})`);
|
||||
await downloadFromS3(S3_BUCKET, sourceKey, segmentInputPath);
|
||||
|
||||
console.log(`[conform] Trimming ${edit.editNumber}: ${edit.sourceIn} → ${edit.sourceOut}`);
|
||||
await trimSegment(segmentInputPath, segmentOutputPath, edit.sourceIn, edit.sourceOut);
|
||||
// Trim + normalise in a single ffmpeg pass per segment. We re-encode
|
||||
// here (libx264 ultrafast) so every segment lands at the same spec
|
||||
// — same fps, resolution, pixel format, sample rate, channel layout
|
||||
// — which lets the final concat-demuxer step run reliably even when
|
||||
// the source clips are wildly different (mixed codecs / fps / sample
|
||||
// rate). The double-encode (intermediate h264 → final ProRes) costs
|
||||
// some CPU but avoids the concat filter's opaque "Invalid argument"
|
||||
// failures with disparate sources.
|
||||
console.log(`[conform] Trim + normalise ${edit.editNumber}: ${edit.sourceIn} → ${edit.sourceOut}`);
|
||||
const segMs = await getMediaInfo(segmentInputPath);
|
||||
const segFps = segMs.fps || 30;
|
||||
const inSec = edit.sourceIn / segFps;
|
||||
const durSec = (edit.sourceOut - edit.sourceIn) / segFps;
|
||||
await runFFmpeg([
|
||||
'-ss', String(inSec),
|
||||
'-i', segmentInputPath,
|
||||
'-t', String(durSec),
|
||||
'-vf', `fps=${Math.round(seqFps) || 30},` +
|
||||
`scale=1920:1080:force_original_aspect_ratio=decrease,` +
|
||||
`pad=1920:1080:(ow-iw)/2:(oh-ih)/2,` +
|
||||
`setsar=1,format=yuv420p`,
|
||||
'-af', 'aresample=48000:ocl=stereo',
|
||||
'-c:v', 'libx264', '-preset', 'ultrafast', '-crf', '18',
|
||||
'-pix_fmt', 'yuv420p',
|
||||
'-c:a', 'aac', '-b:a', '320k', '-ar', '48000',
|
||||
'-shortest',
|
||||
'-y', segmentOutputPath,
|
||||
]);
|
||||
|
||||
concatList.push(segmentOutputPath);
|
||||
await unlink(segmentInputPath).catch(() => {});
|
||||
|
|
@ -267,74 +293,21 @@ export const conformWorker = async (job) => {
|
|||
'-crf', quality === 'broadcast' ? '18' : quality === 'high' ? '23' : '28',
|
||||
];
|
||||
|
||||
// Concat strategy: the demuxer is fast but requires identical specs
|
||||
// (codec, pixel format, framerate, resolution) across all segments.
|
||||
// When sources differ (AV1 + H.264, mixed framerates, etc.) it dies
|
||||
// with "Error sending frames to consumers: Invalid argument". Use the
|
||||
// concat *filter* instead — it normalises each input through a filter
|
||||
// graph before joining, so disparate sources work.
|
||||
//
|
||||
// Normalise every input to the target resolution + 1/SAR + yuv420p so
|
||||
// the encode is deterministic. Output frame rate is unset; ffmpeg
|
||||
// will use whatever the final filter graph emits, which the target
|
||||
// codec accepts.
|
||||
const wantAudio = !(audio === 'none' || audio === 'off');
|
||||
const inputArgs = [];
|
||||
concatList.forEach(p => { inputArgs.push('-i', p); });
|
||||
|
||||
// Build the filter graph. The concat filter in ffmpeg 8.x requires
|
||||
// identical resolution, pixel format, SAR, FRAME RATE and audio
|
||||
// SAMPLE RATE / CHANNEL LAYOUT across all inputs. Different-spec
|
||||
// sources (e.g. a 23.98 fps clip + a 60 fps clip) trip
|
||||
// Concat: every segment was normalised at trim time (uniform fps,
|
||||
// resolution, pixel format, sample rate, stereo). The demuxer can
|
||||
// stream-stitch them and we just need to transcode the result to the
|
||||
// final target codec. This bypasses ffmpeg 8.x's brittle concat-
|
||||
// filter path that was throwing
|
||||
// [fc#0] Error sending frames to consumers: Invalid argument
|
||||
// even though our earlier scale+pad+format pass took care of the
|
||||
// pixel side. Force the time-domain axes too:
|
||||
// fps=<target> — resample video to a constant rate
|
||||
// setpts=PTS-STARTPTS — re-zero PTS so concat's per-input clock
|
||||
// resets cleanly
|
||||
// aresample=48000 — force a single audio sample rate
|
||||
// asetpts=PTS-STARTPTS — same for audio
|
||||
const targetW = 1920;
|
||||
const targetH = 1080;
|
||||
const targetFps = Math.round(seqFps) || 30;
|
||||
const targetSampleRate = 48000;
|
||||
|
||||
const vLabels = [];
|
||||
const aLabels = [];
|
||||
let normalize = '';
|
||||
for (let i = 0; i < concatList.length; i++) {
|
||||
normalize +=
|
||||
`[${i}:v:0]fps=${targetFps},` +
|
||||
`scale=${targetW}:${targetH}:force_original_aspect_ratio=decrease,` +
|
||||
`pad=${targetW}:${targetH}:(ow-iw)/2:(oh-ih)/2,` +
|
||||
`setsar=1,format=yuv420p,setpts=PTS-STARTPTS[v${i}];`;
|
||||
vLabels.push(`[v${i}]`);
|
||||
if (wantAudio) {
|
||||
normalize +=
|
||||
`[${i}:a:0]aresample=${targetSampleRate},` +
|
||||
`aformat=channel_layouts=stereo:sample_fmts=fltp,` +
|
||||
`asetpts=PTS-STARTPTS[a${i}];`;
|
||||
aLabels.push(`[a${i}]`);
|
||||
}
|
||||
}
|
||||
const n = concatList.length;
|
||||
let concatExpr;
|
||||
if (wantAudio) {
|
||||
const interleaved = [];
|
||||
for (let i = 0; i < n; i++) { interleaved.push(vLabels[i], aLabels[i]); }
|
||||
concatExpr = `${interleaved.join('')}concat=n=${n}:v=1:a=1[outv][outa]`;
|
||||
} else {
|
||||
concatExpr = `${vLabels.join('')}concat=n=${n}:v=1:a=0[outv]`;
|
||||
}
|
||||
const filterComplex = normalize + concatExpr;
|
||||
|
||||
const mapArgs = wantAudio ? ['-map', '[outv]', '-map', '[outa]'] : ['-map', '[outv]'];
|
||||
const encodeAudio = wantAudio ? ['-c:a', 'aac', '-b:a', '320k', '-ar', '48000'] : ['-an'];
|
||||
// on mixed-source timelines.
|
||||
const encodeAudio = (audio === 'none' || audio === 'off')
|
||||
? ['-an']
|
||||
: ['-c:a', 'aac', '-b:a', '320k', '-ar', '48000'];
|
||||
|
||||
await runFFmpeg([
|
||||
...inputArgs,
|
||||
'-filter_complex', filterComplex,
|
||||
...mapArgs,
|
||||
'-f', 'concat',
|
||||
'-safe', '0',
|
||||
'-i', segmentListPath,
|
||||
'-c:v', videoCodec,
|
||||
...profileFlag,
|
||||
...qualityArgs,
|
||||
|
|
|
|||
Loading…
Reference in a new issue