dragonflight/services/worker/src/workers/conform.js

415 lines
17 KiB
JavaScript
Raw Normal View History

import { join } from 'path';
import { unlink, writeFile, mkdir, rm } from 'fs/promises';
import { tmpdir } from 'os';
import { Queue } from 'bullmq';
import { query } from '../db/client.js';
import { downloadFromS3, uploadToS3 } from '../s3/client.js';
import { trimSegment, concatSegments, runFFmpeg, getMediaInfo } from '../ffmpeg/executor.js';
import { parseEDL } from '../edl/parser.js';
import { XMLParser } from 'fast-xml-parser';
const S3_BUCKET = process.env.S3_BUCKET || 'wild-dragon';
// Used to queue a proxy build for the conformed output so the library /
// asset viewer has a browser-playable H.264 preview. Without this the
// browser hits MEDIA_ERR_SRC_NOT_SUPPORTED on ProRes / DNxHR outputs.
const parseRedisUrl = (url) => {
try {
const parsed = new URL(url);
return { host: parsed.hostname, port: parseInt(parsed.port, 10) || 6379 };
} catch { return { host: 'localhost', port: 6379 }; }
};
const proxyQueue = new Queue('proxy', {
connection: parseRedisUrl(process.env.REDIS_URL || 'redis://queue:6379'),
});
const xmlParser = new XMLParser({
ignoreAttributes: false,
attributeNamePrefix: '@_',
});
function parseFcpXml(xmlContent) {
const doc = xmlParser.parse(xmlContent);
const sequence = doc?.xmeml?.sequence;
if (!sequence) throw new Error('Invalid FCP XML: no sequence element');
const name = sequence.name || 'Untitled';
const rate = sequence?.rate?.timebase ? parseInt(sequence.rate.timebase, 10) : 29.97;
const width = parseInt(sequence?.media?.video?.format?.samplecharacteristics?.width || 1920, 10);
const height = parseInt(sequence?.media?.video?.format?.samplecharacteristics?.height || 1080, 10);
const clips = [];
const videoTracks = sequence?.media?.video?.track || [];
const tracks = Array.isArray(videoTracks) ? videoTracks : [videoTracks];
for (const track of tracks) {
const trackNum = parseInt(track?.['@_currentExplodedTrackIndex'] || 0, 10);
const trackItems = track?.clipitem || [];
const items = Array.isArray(trackItems) ? trackItems : [trackItems];
for (const item of items) {
if (!item) continue;
const fileUrl = item?.file?.name || item?.file?.pathurl || '';
const fileName = fileUrl.split('/').pop() || fileUrl.split('\\').pop() || 'unknown';
const srcIn = parseFrame(item?.in?.toString() || '0', rate);
const srcOut = parseFrame(item?.out?.toString() || '0', rate);
const recIn = parseFrame(item?.start?.toString() || '0', rate);
const recOut = parseFrame(item?.end?.toString() || '0', rate);
const duration = parseFrame(item?.duration?.toString() || '0', rate);
if (srcOut <= srcIn || recOut <= recIn) continue;
clips.push({
trackIndex: trackNum,
fileName,
fileUrl,
sourceInFrames: srcIn,
sourceOutFrames: srcOut,
timelineInFrames: recIn,
timelineOutFrames: recOut,
duration,
});
}
}
return { name, frameRate: rate, width, height, clips };
}
function parseFrame(value, fps) {
// FCP XML stores timecode or frame count
const trimmed = value.trim();
// If it's a plain number, return as-is
if (/^\d+$/.test(trimmed)) return parseInt(trimmed, 10);
// HH:MM:SS:FF or HH:MM:SS;FF
const parts = trimmed.split(/[:;]/);
if (parts.length === 4) {
const hh = parseInt(parts[0], 10);
const mm = parseInt(parts[1], 10);
const ss = parseInt(parts[2], 10);
const ff = parseInt(parts[3], 10);
return hh * 3600 * fps + mm * 60 * fps + ss * fps + ff;
}
return 0;
}
export const conformWorker = async (job) => {
const { edl, fcpXml, projectId, sequenceId, sequenceName, frameRate, codec, quality, resolution, audio } = job.data;
const jobId = job.id;
const tmpDir = tmpdir();
const segmentsDir = join(tmpDir, `segments-${jobId}`);
const segmentListPath = join(tmpDir, `segments-${jobId}.txt`);
// Container per codec — ProRes / DNxHR live in QuickTime (MOV); MP4 only
// accepts H.264/H.265 and a handful of others. The earlier .mp4 hard-code
// tripped ffmpeg with:
// [mp4] Could not find tag for codec prores in stream #0,
// codec not currently supported in container
const outputExt =
(codec === 'prores' || codec === 'prores_hq' || codec === 'prores_4444' || codec === 'dnxhr_hq')
? 'mov' : 'mp4';
const outputPath = join(tmpDir, `output-${jobId}.${outputExt}`);
try {
let edits = [];
let seqName = sequenceName || 'Conformed';
let seqFps = parseFloat(frameRate) || 29.97;
// ── Resolve edits ────────────────────────────────────────────────
//
// Preference order:
// 1) sequenceId — read sequence_clips, which the Premiere panel
// populated with authoritative asset_id mappings on push. This
// avoids any filename matching, which is brittle because the
// panel's local Premiere file paths (e.g. "dragonflight-<name>"
// with sanitised characters) do not match the original MAM
// filenames in the assets table.
// 2) edl — legacy EDL input, filename-resolved.
// 3) fcpXml — parse the XML for clipitems, filename-resolved.
//
// The XML is still parsed when sequenceId is also provided, because
// we want its sequence name + frame rate metadata even when the
// authoritative clip list comes from the DB.
if (sequenceId) {
await job.updateProgress(5);
console.log(`[conform] Resolving edits from sequence_clips for sequence ${sequenceId}`);
const clipRows = await query(
`SELECT sc.asset_id, sc.source_in_frames, sc.source_out_frames,
sc.timeline_in_frames, sc.timeline_out_frames, sc.track,
a.original_s3_key, a.filename
FROM sequence_clips sc
JOIN assets a ON a.id = sc.asset_id
WHERE sc.sequence_id = $1
ORDER BY sc.timeline_in_frames ASC, sc.track ASC`,
[sequenceId]
);
if (!clipRows.rows.length) {
throw new Error('Sequence has no clips. Push the timeline from Premiere first.');
}
edits = clipRows.rows.map((r, i) => ({
editNumber: i + 1,
reelName: r.filename,
asset_id: r.asset_id,
original_s3_key: r.original_s3_key,
sourceIn: r.source_in_frames,
sourceOut: r.source_out_frames,
}));
// Parse XML for sequence-level metadata if it's also provided.
if (fcpXml) {
try {
const parsed = parseFcpXml(fcpXml);
seqName = parsed.name || seqName;
seqFps = parsed.frameRate || seqFps;
} catch (e) {
console.warn(`[conform] XML metadata parse skipped: ${e.message}`);
}
}
} else if (edl) {
await job.updateProgress(5);
console.log(`[conform] Parsing EDL for job ${jobId}`);
edits = parseEDL(edl).map((e, i) => ({
editNumber: e.editNumber || i + 1,
reelName: e.reelName,
sourceIn: e.sourceIn,
sourceOut: e.sourceOut,
}));
} else if (fcpXml) {
await job.updateProgress(5);
console.log(`[conform] Parsing FCP XML for job ${jobId}`);
const parsed = parseFcpXml(fcpXml);
seqName = parsed.name || seqName;
seqFps = parsed.frameRate || seqFps;
edits = parsed.clips.map((c, i) => ({
editNumber: i + 1,
reelName: c.fileName,
sourceIn: c.sourceInFrames,
sourceOut: c.sourceOutFrames,
}));
} else {
throw new Error('No input provided — expected edl or fcpXml in job data');
}
await mkdir(segmentsDir, { recursive: true });
let processedEdits = 0;
const concatList = [];
for (const edit of edits) {
await job.updateProgress(Math.min(5 + (processedEdits / edits.length) * 50, 55));
console.log(`[conform] Processing edit ${edit.editNumber}: ${edit.reelName}`);
// If the edit was resolved from sequence_clips above, the asset's
// original_s3_key is already attached — skip the filename lookup
// entirely (it would 0-match anyway because the panel's reelName
// is the local Premiere file path with "dragonflight-" prefix).
let sourceKey = edit.original_s3_key || null;
if (!sourceKey) {
// Legacy path (EDL or fcpXml without sequenceId): match by filename,
// preferring same-project assets to avoid cross-project collisions.
let assetRes;
if (projectId) {
assetRes = await query(
`SELECT id, original_s3_key FROM assets
WHERE filename = $1 AND project_id = $2
LIMIT 1`,
[edit.reelName, projectId]
);
if (assetRes.rows.length === 0) {
assetRes = await query(
'SELECT id, original_s3_key FROM assets WHERE filename = $1 LIMIT 1',
[edit.reelName]
);
}
} else {
assetRes = await query(
'SELECT id, original_s3_key FROM assets WHERE filename = $1 LIMIT 1',
[edit.reelName]
);
}
if (assetRes.rows.length === 0) {
throw new Error(`Asset not found for reel: ${edit.reelName}`);
}
sourceKey = assetRes.rows[0].original_s3_key;
}
const segmentInputPath = join(segmentsDir, `segment-${edit.editNumber}-src`);
const segmentOutputPath = join(segmentsDir, `segment-${edit.editNumber}.mov`);
console.log(`[conform] Downloading segment ${edit.editNumber} from S3 (${sourceKey})`);
await downloadFromS3(S3_BUCKET, sourceKey, segmentInputPath);
// Trim + normalise in a single ffmpeg pass per segment. We re-encode
// here (libx264 ultrafast) so every segment lands at the same spec
// — same fps, resolution, pixel format, sample rate, channel layout
// — which lets the final concat-demuxer step run reliably even when
// the source clips are wildly different (mixed codecs / fps / sample
// rate). The double-encode (intermediate h264 → final ProRes) costs
// some CPU but avoids the concat filter's opaque "Invalid argument"
// failures with disparate sources.
console.log(`[conform] Trim + normalise ${edit.editNumber}: ${edit.sourceIn}${edit.sourceOut}`);
const segMs = await getMediaInfo(segmentInputPath);
const segFps = segMs.fps || 30;
const inSec = edit.sourceIn / segFps;
const durSec = (edit.sourceOut - edit.sourceIn) / segFps;
await runFFmpeg([
'-ss', String(inSec),
'-i', segmentInputPath,
'-t', String(durSec),
'-vf', `fps=${Math.round(seqFps) || 30},` +
`scale=1920:1080:force_original_aspect_ratio=decrease,` +
`pad=1920:1080:(ow-iw)/2:(oh-ih)/2,` +
`setsar=1,format=yuv420p`,
// ffmpeg 8.x dropped the `ocl=` shortcut on aresample. Use aformat
// for the channel layout assertion + auto-conversion; aresample
// just sets the rate.
'-af', 'aresample=48000,aformat=channel_layouts=stereo:sample_fmts=fltp',
'-c:v', 'libx264', '-preset', 'ultrafast', '-crf', '18',
'-pix_fmt', 'yuv420p',
'-c:a', 'aac', '-b:a', '320k', '-ar', '48000',
'-shortest',
'-y', segmentOutputPath,
]);
concatList.push(segmentOutputPath);
await unlink(segmentInputPath).catch(() => {});
processedEdits++;
}
await job.updateProgress(60);
console.log(`[conform] Writing concat list for ${concatList.length} segments`);
const concatContent = concatList.map(p => `file '${p}'`).join('\n');
await writeFile(segmentListPath, concatContent, 'utf-8');
await job.updateProgress(70);
console.log(`[conform] Concatenating segments for job ${jobId}`);
// Audio: be permissive. Anything that isn't an explicit 'none' should
// get encoded — the panel sends 'broadcast' (default), 'include' is the
// legacy value, and there's no reason to silently drop audio for any
// other label. 320k AAC is a safe broadcast-quality default in mp4.
const audioFlag = (audio === 'none' || audio === 'off')
? ['-an']
: ['-c:a', 'aac', '-b:a', '320k', '-ar', '48000'];
// Codec map. The panel sends 'prores_hq' / 'prores_4444' / 'h264' / 'h265'
// / 'dnxhr_hq'; old EDL callers send 'prores' / 'h265' / 'h264'. Match
// both. prores_ks profiles: 0=proxy 1=lt 2=std 3=hq 4=4444.
let videoCodec, profileFlag = [];
if (codec === 'prores_hq' || codec === 'prores') {
videoCodec = 'prores_ks'; profileFlag = ['-profile:v', '3'];
} else if (codec === 'prores_4444') {
videoCodec = 'prores_ks'; profileFlag = ['-profile:v', '4'];
} else if (codec === 'h265' || codec === 'hevc') {
videoCodec = 'libx265';
} else if (codec === 'dnxhr_hq') {
videoCodec = 'dnxhd'; profileFlag = ['-profile:v', 'dnxhr_hq'];
} else {
videoCodec = 'libx264';
}
// prores_ks ignores -crf and uses -preset differently; libx264/x265 use
// crf-based quality. Branch the encode args.
const isProRes = videoCodec === 'prores_ks';
const qualityArgs = isProRes
? [] // ProRes profile already encodes the quality target
: [
'-preset', quality === 'high' ? 'slow' : quality === 'broadcast' ? 'veryslow' : 'fast',
'-crf', quality === 'broadcast' ? '18' : quality === 'high' ? '23' : '28',
];
// Concat: every segment was normalised at trim time (uniform fps,
// resolution, pixel format, sample rate, stereo). The demuxer can
// stream-stitch them and we just need to transcode the result to the
// final target codec. This bypasses ffmpeg 8.x's brittle concat-
// filter path that was throwing
// [fc#0] Error sending frames to consumers: Invalid argument
// on mixed-source timelines.
const encodeAudio = (audio === 'none' || audio === 'off')
? ['-an']
: ['-c:a', 'aac', '-b:a', '320k', '-ar', '48000'];
await runFFmpeg([
'-f', 'concat',
'-safe', '0',
'-i', segmentListPath,
'-c:v', videoCodec,
...profileFlag,
...qualityArgs,
...encodeAudio,
'-y', outputPath,
]);
await job.updateProgress(85);
const outputKey = `jobs/${jobId}/conformed.${outputExt}`;
console.log(`[conform] Uploading output to ${outputKey}`);
await uploadToS3(S3_BUCKET, outputKey, outputPath);
// Register the conformed output as a new asset
const assetRes = await query(
`INSERT INTO assets (project_id, filename, display_name, media_type, status, original_s3_key, codec, resolution, fps, duration_ms, conform_source_sequence_id)
VALUES ($1, $2, $3, 'video', 'ready', $4, $5, $6, $7, $8, $9) RETURNING id`,
[
projectId || null,
`conformed-${seqName.replace(/[^a-z0-9]/gi, '_')}.${outputExt}`,
`Conformed: ${seqName}`,
outputKey,
// Normalise the panel's codec id into the canonical name we store on
// the asset row. Keep aligned with the encode branch above.
(codec === 'prores_hq' || codec === 'prores_4444' || codec === 'prores') ? 'prores'
: (codec === 'h265' || codec === 'hevc') ? 'hevc'
: (codec === 'dnxhr_hq') ? 'dnxhd'
: 'h264',
resolution !== 'match' ? resolution : '1920x1080',
seqFps,
null,
job.data.sequenceId || null,
]
);
const newAssetId = assetRes.rows[0].id;
// Queue a proxy build so the library has a browser-playable H.264 file.
// ProRes / DNxHR masters don't decode in HTML5 video; without this step
// the asset shows MEDIA_ERR_SRC_NOT_SUPPORTED in the player. Mirror the
// ingest pipeline's pattern (services/mam-api/src/routes/assets.js).
try {
const generatedProxyKey = `proxies/${newAssetId}.mp4`;
await proxyQueue.add('generate', {
assetId: newAssetId,
inputKey: outputKey,
outputKey: generatedProxyKey,
});
console.log(`[conform] queued proxy build for ${newAssetId}`);
} catch (e) {
// Don't fail the conform job if the proxy queue is unreachable —
// the asset still exists, an operator can retrigger the proxy.
console.warn(`[conform] failed to queue proxy for ${newAssetId}: ${e.message}`);
}
await job.updateProgress(100);
console.log(`[conform] Job ${jobId} complete → asset ${newAssetId}`);
return { jobId, outputKey, assetId: newAssetId };
} catch (error) {
console.error(`[conform] Error in job ${jobId}:`, error);
// BUG FIX #1: Mark the output asset (if any) as 'error' so the UI doesn't
// show a perpetually-spinning 'processing' state when the conform fails.
// We don't have an assetId until the INSERT succeeds, so target by job key.
await query(
`UPDATE assets
SET status = 'error', updated_at = NOW()
WHERE original_s3_key = $1`,
[`jobs/${jobId}/conformed.mp4`]
).catch(e => console.error('[conform] Failed to mark asset error:', e.message));
throw error;
} finally {
await Promise.all([
unlink(segmentListPath).catch(() => {}),
unlink(outputPath).catch(() => {}),
rm(segmentsDir, { recursive: true, force: true }).catch(() => {}),
]);
}
};