// YouTube importer worker — shells out to yt-dlp, lands the resulting MP4 in
// S3 at the same originals/{assetId}/
.mp4 path uploads use, then hands
// off to the existing proxy queue. From that point an imported asset is
// indistinguishable from an uploaded one.
import { spawn } from 'node:child_process';
import { join } from 'node:path';
import { mkdtemp, rm, stat, readdir } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { Queue } from 'bullmq';
import { query } from '../db/client.js';
import { uploadToS3 } from '../s3/client.js';
import { getMediaInfo } from '../ffmpeg/executor.js';
const S3_BUCKET = process.env.S3_BUCKET || 'wild-dragon';
const parseRedisUrl = (url) => {
const parsed = new URL(url);
return { host: parsed.hostname, port: parseInt(parsed.port, 10) };
};
// BUG FIX #7: Keep proxyQueue as a module-level singleton so it is only
// opened once and can be closed on SIGTERM (via the exported closer).
// Previously the worker created a new Queue on every job invocation; each
// BullMQ Queue holds an open Redis connection that prevented clean shutdown.
export const proxyQueue = new Queue('proxy', {
connection: parseRedisUrl(process.env.REDIS_URL || 'redis://queue:6379'),
});
// Map yt-dlp stderr lines to short, operator-friendly messages. Anything that
// doesn't match here falls back to the raw stderr (truncated).
function friendlyError(stderr) {
const s = stderr || '';
if (/Private video/i.test(s)) return 'Private video — not supported.';
if (/Sign in to confirm your age/i.test(s)) return 'Age-restricted video — not supported.';
if (/members[- ]only/i.test(s)) return 'Members-only video — not supported.';
if (/Video unavailable/i.test(s)) return 'Video unavailable or removed.';
if (/not available in your country|geo[- ]?restricted/i.test(s)) return 'Video is geo-blocked from this region.';
if (/HTTP Error 429/i.test(s)) return 'YouTube rate-limited the importer — try again later.';
if (/Unable to extract|Unsupported URL/i.test(s)) return 'YouTube changed its API — worker image needs a rebuild.';
// Last-resort: the first ERROR: line from stderr, capped.
const m = s.match(/ERROR:\s*([^\n]+)/i);
const raw = (m ? m[1] : s).trim().slice(0, 300);
return raw || 'yt-dlp failed with no error message';
}
// Replace anything outside [A-Za-z0-9 ._-] with '-', collapse runs of
// whitespace/dashes, trim, cap to 120 chars. The .mp4 extension is appended
// by the caller. If the result is empty we fall back to the video ID.
function sanitizeTitle(title, videoId) {
if (!title || typeof title !== 'string') return `youtube-${videoId}`;
let out = title
.replace(/[^\w .\-]+/g, '-')
.replace(/[-\s]+/g, ' ')
.trim()
.slice(0, 120);
if (!out) out = `youtube-${videoId}`;
return out;
}
// Run yt-dlp with progress streaming. Returns the parsed --print-json line.
// Throws if yt-dlp exits non-zero — the caller maps stderr to a friendly msg.
async function runYtDlp({ url, outputTemplate, onProgress }) {
return new Promise((resolve, reject) => {
const args = [
'--no-playlist',
'--no-warnings',
'--restrict-filenames',
'-f', "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4]/b",
'--merge-output-format', 'mp4',
'--print-json',
'--newline',
'-o', outputTemplate,
url,
];
const proc = spawn('yt-dlp', args);
let stdoutBuf = '';
let stderrBuf = '';
let lastJsonLine = null;
proc.stdout.on('data', (chunk) => {
stdoutBuf += chunk.toString();
let nl;
while ((nl = stdoutBuf.indexOf('\n')) !== -1) {
const line = stdoutBuf.slice(0, nl);
stdoutBuf = stdoutBuf.slice(nl + 1);
// [download] 42.3% of 53.21MiB at 4.21MiB/s ETA 00:07
const m = line.match(/\[download\]\s+(\d+(?:\.\d+)?)%/);
if (m && onProgress) onProgress(parseFloat(m[1]));
// --print-json emits one JSON line at the end of a successful download.
if (line.startsWith('{') && line.endsWith('}')) {
try { lastJsonLine = JSON.parse(line); } catch { /* not the json line */ }
}
}
});
proc.stderr.on('data', (chunk) => { stderrBuf += chunk.toString(); });
proc.on('error', (err) => reject(new Error(`Failed to spawn yt-dlp: ${err.message}`)));
proc.on('close', (code) => {
if (code === 0) {
resolve(lastJsonLine || {});
} else {
const err = new Error(friendlyError(stderrBuf));
err.stderr = stderrBuf;
err.code = code;
reject(err);
}
});
});
}
export const youtubeImportWorker = async (job) => {
const { assetId, url } = job.data;
// Each job gets its own temp directory so concurrent jobs (if we ever bump
// concurrency above 1) can't clobber each other's intermediate files.
const workDir = await mkdtemp(join(tmpdir(), `yt-${job.id}-`));
const outputTemplate = join(workDir, `${assetId}.%(ext)s`);
try {
console.log(`[youtube] Asset ${assetId}: importing ${url}`);
await job.updateProgress(2);
// yt-dlp does the work; progress 5..60 covers the download.
const meta = await runYtDlp({
url,
outputTemplate,
onProgress: async (pct) => {
const mapped = 5 + Math.floor(pct * 0.55);
try { await job.updateProgress(mapped); } catch { /* ignore */ }
},
});
await job.updateProgress(62);
// Find the resulting MP4 — yt-dlp's --merge-output-format ensures .mp4
// but we scan the dir defensively in case the format string changes.
const files = await readdir(workDir);
const mp4 = files.find((f) => f.endsWith('.mp4'));
if (!mp4) {
throw new Error(`yt-dlp produced no .mp4 in ${workDir} (got ${files.join(', ') || 'nothing'})`);
}
const localPath = join(workDir, mp4);
const { size: fileSize } = await stat(localPath);
if (fileSize < 4096) {
throw new Error(`Downloaded file is suspiciously small (${fileSize} bytes) — aborting before upload.`);
}
// ffprobe the file ourselves — yt-dlp's metadata is sometimes missing or
// wrong (especially fps), and we already trust ffprobe everywhere else.
let mediaInfo = {};
try {
mediaInfo = await getMediaInfo(localPath);
} catch (err) {
console.warn(`[youtube] getMediaInfo failed for ${assetId}: ${err.message}`);
}
const videoId = meta.id || mp4.replace(/\..+$/, '').replace(/^.*-/, '');
const sanitized = sanitizeTitle(meta.title || meta.fulltitle, videoId);
const filename = `${sanitized}.mp4`;
const originalKey = `originals/${assetId}/${filename}`;
await job.updateProgress(70);
console.log(`[youtube] Uploading ${localPath} → s3://${S3_BUCKET}/${originalKey} (${fileSize} bytes)`);
await uploadToS3(S3_BUCKET, originalKey, localPath);
await job.updateProgress(90);
// Backfill the asset row with the real title + S3 key + ffprobe metadata,
// then flip to 'processing' so the rest of the UI treats it like any
// freshly-uploaded asset.
await query(
`UPDATE assets
SET filename = $1,
display_name = $2,
original_s3_key = $3,
codec = COALESCE($4, codec),
resolution = COALESCE($5, resolution),
fps = COALESCE($6, fps),
duration_ms = COALESCE($7, duration_ms),
file_size = COALESCE($8, file_size),
status = 'processing',
updated_at = NOW()
WHERE id = $9`,
[
filename,
meta.title || meta.fulltitle || filename,
originalKey,
mediaInfo.codec ?? null,
mediaInfo.resolution ?? null,
mediaInfo.fps ?? null,
mediaInfo.durationMs ?? null,
mediaInfo.fileSizeBytes ?? fileSize,
assetId,
]
);
// Hand off to the proxy queue — identical payload shape to upload.js so
// the proxy worker doesn't need to know this came from an import.
await proxyQueue.add('generate', {
assetId,
inputKey: originalKey,
outputKey: `proxies/${assetId}.mp4`,
});
console.log(`[youtube] Asset ${assetId} imported (${meta.title || 'untitled'}); proxy job queued`);
await job.updateProgress(100);
return { assetId, originalKey };
} catch (error) {
console.error(`[youtube] Import failed for asset ${assetId}:`, error.message);
await query(
`UPDATE assets SET status = 'error', updated_at = NOW() WHERE id = $1`,
[assetId]
);
throw error;
} finally {
await rm(workDir, { recursive: true, force: true }).catch(() => {});
}
};