// YouTube importer worker — shells out to yt-dlp, lands the resulting MP4 in // S3 at the same originals/{assetId}/.mp4 path uploads use, then hands // off to the existing proxy queue. From that point an imported asset is // indistinguishable from an uploaded one. import { spawn } from 'node:child_process'; import { join } from 'node:path'; import { mkdtemp, rm, stat, readdir } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { Queue } from 'bullmq'; import { query } from '../db/client.js'; import { uploadToS3 } from '../s3/client.js'; import { getMediaInfo } from '../ffmpeg/executor.js'; const S3_BUCKET = process.env.S3_BUCKET || 'wild-dragon'; const parseRedisUrl = (url) => { const parsed = new URL(url); return { host: parsed.hostname, port: parseInt(parsed.port, 10) }; }; // BUG FIX #7: Keep proxyQueue as a module-level singleton so it is only // opened once and can be closed on SIGTERM (via the exported closer). // Previously the worker created a new Queue on every job invocation; each // BullMQ Queue holds an open Redis connection that prevented clean shutdown. export const proxyQueue = new Queue('proxy', { connection: parseRedisUrl(process.env.REDIS_URL || 'redis://queue:6379'), }); // Map yt-dlp stderr lines to short, operator-friendly messages. Anything that // doesn't match here falls back to the raw stderr (truncated). function friendlyError(stderr) { const s = stderr || ''; if (/Private video/i.test(s)) return 'Private video — not supported.'; if (/Sign in to confirm your age/i.test(s)) return 'Age-restricted video — not supported.'; if (/members[- ]only/i.test(s)) return 'Members-only video — not supported.'; if (/Video unavailable/i.test(s)) return 'Video unavailable or removed.'; if (/not available in your country|geo[- ]?restricted/i.test(s)) return 'Video is geo-blocked from this region.'; if (/HTTP Error 429/i.test(s)) return 'YouTube rate-limited the importer — try again later.'; if (/Unable to extract|Unsupported URL/i.test(s)) return 'YouTube changed its API — worker image needs a rebuild.'; // Last-resort: the first ERROR: line from stderr, capped. const m = s.match(/ERROR:\s*([^\n]+)/i); const raw = (m ? m[1] : s).trim().slice(0, 300); return raw || 'yt-dlp failed with no error message'; } // Replace anything outside [A-Za-z0-9 ._-] with '-', collapse runs of // whitespace/dashes, trim, cap to 120 chars. The .mp4 extension is appended // by the caller. If the result is empty we fall back to the video ID. function sanitizeTitle(title, videoId) { if (!title || typeof title !== 'string') return `youtube-${videoId}`; let out = title .replace(/[^\w .\-]+/g, '-') .replace(/[-\s]+/g, ' ') .trim() .slice(0, 120); if (!out) out = `youtube-${videoId}`; return out; } // Run yt-dlp with progress streaming. Returns the parsed --print-json line. // Throws if yt-dlp exits non-zero — the caller maps stderr to a friendly msg. async function runYtDlp({ url, outputTemplate, onProgress }) { return new Promise((resolve, reject) => { const args = [ '--no-playlist', '--no-warnings', '--restrict-filenames', '-f', "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4]/b", '--merge-output-format', 'mp4', '--print-json', '--newline', '-o', outputTemplate, url, ]; const proc = spawn('yt-dlp', args); let stdoutBuf = ''; let stderrBuf = ''; let lastJsonLine = null; proc.stdout.on('data', (chunk) => { stdoutBuf += chunk.toString(); let nl; while ((nl = stdoutBuf.indexOf('\n')) !== -1) { const line = stdoutBuf.slice(0, nl); stdoutBuf = stdoutBuf.slice(nl + 1); // [download] 42.3% of 53.21MiB at 4.21MiB/s ETA 00:07 const m = line.match(/\[download\]\s+(\d+(?:\.\d+)?)%/); if (m && onProgress) onProgress(parseFloat(m[1])); // --print-json emits one JSON line at the end of a successful download. if (line.startsWith('{') && line.endsWith('}')) { try { lastJsonLine = JSON.parse(line); } catch { /* not the json line */ } } } }); proc.stderr.on('data', (chunk) => { stderrBuf += chunk.toString(); }); proc.on('error', (err) => reject(new Error(`Failed to spawn yt-dlp: ${err.message}`))); proc.on('close', (code) => { if (code === 0) { resolve(lastJsonLine || {}); } else { const err = new Error(friendlyError(stderrBuf)); err.stderr = stderrBuf; err.code = code; reject(err); } }); }); } export const youtubeImportWorker = async (job) => { const { assetId, url } = job.data; // Each job gets its own temp directory so concurrent jobs (if we ever bump // concurrency above 1) can't clobber each other's intermediate files. const workDir = await mkdtemp(join(tmpdir(), `yt-${job.id}-`)); const outputTemplate = join(workDir, `${assetId}.%(ext)s`); try { console.log(`[youtube] Asset ${assetId}: importing ${url}`); await job.updateProgress(2); // yt-dlp does the work; progress 5..60 covers the download. const meta = await runYtDlp({ url, outputTemplate, onProgress: async (pct) => { const mapped = 5 + Math.floor(pct * 0.55); try { await job.updateProgress(mapped); } catch { /* ignore */ } }, }); await job.updateProgress(62); // Find the resulting MP4 — yt-dlp's --merge-output-format ensures .mp4 // but we scan the dir defensively in case the format string changes. const files = await readdir(workDir); const mp4 = files.find((f) => f.endsWith('.mp4')); if (!mp4) { throw new Error(`yt-dlp produced no .mp4 in ${workDir} (got ${files.join(', ') || 'nothing'})`); } const localPath = join(workDir, mp4); const { size: fileSize } = await stat(localPath); if (fileSize < 4096) { throw new Error(`Downloaded file is suspiciously small (${fileSize} bytes) — aborting before upload.`); } // ffprobe the file ourselves — yt-dlp's metadata is sometimes missing or // wrong (especially fps), and we already trust ffprobe everywhere else. let mediaInfo = {}; try { mediaInfo = await getMediaInfo(localPath); } catch (err) { console.warn(`[youtube] getMediaInfo failed for ${assetId}: ${err.message}`); } const videoId = meta.id || mp4.replace(/\..+$/, '').replace(/^.*-/, ''); const sanitized = sanitizeTitle(meta.title || meta.fulltitle, videoId); const filename = `${sanitized}.mp4`; const originalKey = `originals/${assetId}/${filename}`; await job.updateProgress(70); console.log(`[youtube] Uploading ${localPath} → s3://${S3_BUCKET}/${originalKey} (${fileSize} bytes)`); await uploadToS3(S3_BUCKET, originalKey, localPath); await job.updateProgress(90); // Backfill the asset row with the real title + S3 key + ffprobe metadata, // then flip to 'processing' so the rest of the UI treats it like any // freshly-uploaded asset. await query( `UPDATE assets SET filename = $1, display_name = $2, original_s3_key = $3, codec = COALESCE($4, codec), resolution = COALESCE($5, resolution), fps = COALESCE($6, fps), duration_ms = COALESCE($7, duration_ms), file_size = COALESCE($8, file_size), status = 'processing', updated_at = NOW() WHERE id = $9`, [ filename, meta.title || meta.fulltitle || filename, originalKey, mediaInfo.codec ?? null, mediaInfo.resolution ?? null, mediaInfo.fps ?? null, mediaInfo.durationMs ?? null, mediaInfo.fileSizeBytes ?? fileSize, assetId, ] ); // Hand off to the proxy queue — identical payload shape to upload.js so // the proxy worker doesn't need to know this came from an import. await proxyQueue.add('generate', { assetId, inputKey: originalKey, outputKey: `proxies/${assetId}.mp4`, }); console.log(`[youtube] Asset ${assetId} imported (${meta.title || 'untitled'}); proxy job queued`); await job.updateProgress(100); return { assetId, originalKey }; } catch (error) { console.error(`[youtube] Import failed for asset ${assetId}:`, error.message); await query( `UPDATE assets SET status = 'error', updated_at = NOW() WHERE id = $1`, [assetId] ); throw error; } finally { await rm(workDir, { recursive: true, force: true }).catch(() => {}); } };