226 lines
8.6 KiB
JavaScript
226 lines
8.6 KiB
JavaScript
// YouTube importer worker — shells out to yt-dlp, lands the resulting MP4 in
|
|
// S3 at the same originals/{assetId}/<title>.mp4 path uploads use, then hands
|
|
// off to the existing proxy queue. From that point an imported asset is
|
|
// indistinguishable from an uploaded one.
|
|
|
|
import { spawn } from 'node:child_process';
|
|
import { join } from 'node:path';
|
|
import { mkdtemp, rm, stat, readdir } from 'node:fs/promises';
|
|
import { tmpdir } from 'node:os';
|
|
import { Queue } from 'bullmq';
|
|
import { query } from '../db/client.js';
|
|
import { uploadToS3 } from '../s3/client.js';
|
|
import { getMediaInfo } from '../ffmpeg/executor.js';
|
|
|
|
const S3_BUCKET = process.env.S3_BUCKET || 'wild-dragon';
|
|
|
|
const parseRedisUrl = (url) => {
|
|
const parsed = new URL(url);
|
|
return { host: parsed.hostname, port: parseInt(parsed.port, 10) };
|
|
};
|
|
|
|
// BUG FIX #7: Keep proxyQueue as a module-level singleton so it is only
|
|
// opened once and can be closed on SIGTERM (via the exported closer).
|
|
// Previously the worker created a new Queue on every job invocation; each
|
|
// BullMQ Queue holds an open Redis connection that prevented clean shutdown.
|
|
export const proxyQueue = new Queue('proxy', {
|
|
connection: parseRedisUrl(process.env.REDIS_URL || 'redis://queue:6379'),
|
|
});
|
|
|
|
// Map yt-dlp stderr lines to short, operator-friendly messages. Anything that
|
|
// doesn't match here falls back to the raw stderr (truncated).
|
|
function friendlyError(stderr) {
|
|
const s = stderr || '';
|
|
if (/Private video/i.test(s)) return 'Private video — not supported.';
|
|
if (/Sign in to confirm your age/i.test(s)) return 'Age-restricted video — not supported.';
|
|
if (/members[- ]only/i.test(s)) return 'Members-only video — not supported.';
|
|
if (/Video unavailable/i.test(s)) return 'Video unavailable or removed.';
|
|
if (/not available in your country|geo[- ]?restricted/i.test(s)) return 'Video is geo-blocked from this region.';
|
|
if (/HTTP Error 429/i.test(s)) return 'YouTube rate-limited the importer — try again later.';
|
|
if (/Unable to extract|Unsupported URL/i.test(s)) return 'YouTube changed its API — worker image needs a rebuild.';
|
|
|
|
// Last-resort: the first ERROR: line from stderr, capped.
|
|
const m = s.match(/ERROR:\s*([^\n]+)/i);
|
|
const raw = (m ? m[1] : s).trim().slice(0, 300);
|
|
return raw || 'yt-dlp failed with no error message';
|
|
}
|
|
|
|
// Replace anything outside [A-Za-z0-9 ._-] with '-', collapse runs of
|
|
// whitespace/dashes, trim, cap to 120 chars. The .mp4 extension is appended
|
|
// by the caller. If the result is empty we fall back to the video ID.
|
|
function sanitizeTitle(title, videoId) {
|
|
if (!title || typeof title !== 'string') return `youtube-${videoId}`;
|
|
let out = title
|
|
.replace(/[^\w .\-]+/g, '-')
|
|
.replace(/[-\s]+/g, ' ')
|
|
.trim()
|
|
.slice(0, 120);
|
|
if (!out) out = `youtube-${videoId}`;
|
|
return out;
|
|
}
|
|
|
|
// Run yt-dlp with progress streaming. Returns the parsed --print-json line.
|
|
// Throws if yt-dlp exits non-zero — the caller maps stderr to a friendly msg.
|
|
async function runYtDlp({ url, outputTemplate, onProgress }) {
|
|
return new Promise((resolve, reject) => {
|
|
const args = [
|
|
'--no-playlist',
|
|
'--no-warnings',
|
|
'--restrict-filenames',
|
|
'-f', "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4]/b",
|
|
'--merge-output-format', 'mp4',
|
|
'--print-json',
|
|
'--newline',
|
|
'-o', outputTemplate,
|
|
url,
|
|
];
|
|
|
|
const proc = spawn('yt-dlp', args);
|
|
let stdoutBuf = '';
|
|
let stderrBuf = '';
|
|
let lastJsonLine = null;
|
|
|
|
proc.stdout.on('data', (chunk) => {
|
|
stdoutBuf += chunk.toString();
|
|
let nl;
|
|
while ((nl = stdoutBuf.indexOf('\n')) !== -1) {
|
|
const line = stdoutBuf.slice(0, nl);
|
|
stdoutBuf = stdoutBuf.slice(nl + 1);
|
|
|
|
// [download] 42.3% of 53.21MiB at 4.21MiB/s ETA 00:07
|
|
const m = line.match(/\[download\]\s+(\d+(?:\.\d+)?)%/);
|
|
if (m && onProgress) onProgress(parseFloat(m[1]));
|
|
|
|
// --print-json emits one JSON line at the end of a successful download.
|
|
if (line.startsWith('{') && line.endsWith('}')) {
|
|
try { lastJsonLine = JSON.parse(line); } catch { /* not the json line */ }
|
|
}
|
|
}
|
|
});
|
|
|
|
proc.stderr.on('data', (chunk) => { stderrBuf += chunk.toString(); });
|
|
|
|
proc.on('error', (err) => reject(new Error(`Failed to spawn yt-dlp: ${err.message}`)));
|
|
|
|
proc.on('close', (code) => {
|
|
if (code === 0) {
|
|
resolve(lastJsonLine || {});
|
|
} else {
|
|
const err = new Error(friendlyError(stderrBuf));
|
|
err.stderr = stderrBuf;
|
|
err.code = code;
|
|
reject(err);
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
export const youtubeImportWorker = async (job) => {
|
|
const { assetId, url } = job.data;
|
|
|
|
// Each job gets its own temp directory so concurrent jobs (if we ever bump
|
|
// concurrency above 1) can't clobber each other's intermediate files.
|
|
const workDir = await mkdtemp(join(tmpdir(), `yt-${job.id}-`));
|
|
const outputTemplate = join(workDir, `${assetId}.%(ext)s`);
|
|
|
|
try {
|
|
console.log(`[youtube] Asset ${assetId}: importing ${url}`);
|
|
await job.updateProgress(2);
|
|
|
|
// yt-dlp does the work; progress 5..60 covers the download.
|
|
const meta = await runYtDlp({
|
|
url,
|
|
outputTemplate,
|
|
onProgress: async (pct) => {
|
|
const mapped = 5 + Math.floor(pct * 0.55);
|
|
try { await job.updateProgress(mapped); } catch { /* ignore */ }
|
|
},
|
|
});
|
|
|
|
await job.updateProgress(62);
|
|
|
|
// Find the resulting MP4 — yt-dlp's --merge-output-format ensures .mp4
|
|
// but we scan the dir defensively in case the format string changes.
|
|
const files = await readdir(workDir);
|
|
const mp4 = files.find((f) => f.endsWith('.mp4'));
|
|
if (!mp4) {
|
|
throw new Error(`yt-dlp produced no .mp4 in ${workDir} (got ${files.join(', ') || 'nothing'})`);
|
|
}
|
|
const localPath = join(workDir, mp4);
|
|
|
|
const { size: fileSize } = await stat(localPath);
|
|
if (fileSize < 4096) {
|
|
throw new Error(`Downloaded file is suspiciously small (${fileSize} bytes) — aborting before upload.`);
|
|
}
|
|
|
|
// ffprobe the file ourselves — yt-dlp's metadata is sometimes missing or
|
|
// wrong (especially fps), and we already trust ffprobe everywhere else.
|
|
let mediaInfo = {};
|
|
try {
|
|
mediaInfo = await getMediaInfo(localPath);
|
|
} catch (err) {
|
|
console.warn(`[youtube] getMediaInfo failed for ${assetId}: ${err.message}`);
|
|
}
|
|
|
|
const videoId = meta.id || mp4.replace(/\..+$/, '').replace(/^.*-/, '');
|
|
const sanitized = sanitizeTitle(meta.title || meta.fulltitle, videoId);
|
|
const filename = `${sanitized}.mp4`;
|
|
const originalKey = `originals/${assetId}/${filename}`;
|
|
|
|
await job.updateProgress(70);
|
|
console.log(`[youtube] Uploading ${localPath} → s3://${S3_BUCKET}/${originalKey} (${fileSize} bytes)`);
|
|
await uploadToS3(S3_BUCKET, originalKey, localPath);
|
|
|
|
await job.updateProgress(90);
|
|
|
|
// Backfill the asset row with the real title + S3 key + ffprobe metadata,
|
|
// then flip to 'processing' so the rest of the UI treats it like any
|
|
// freshly-uploaded asset.
|
|
await query(
|
|
`UPDATE assets
|
|
SET filename = $1,
|
|
display_name = $2,
|
|
original_s3_key = $3,
|
|
codec = COALESCE($4, codec),
|
|
resolution = COALESCE($5, resolution),
|
|
fps = COALESCE($6, fps),
|
|
duration_ms = COALESCE($7, duration_ms),
|
|
file_size = COALESCE($8, file_size),
|
|
status = 'processing',
|
|
updated_at = NOW()
|
|
WHERE id = $9`,
|
|
[
|
|
filename,
|
|
meta.title || meta.fulltitle || filename,
|
|
originalKey,
|
|
mediaInfo.codec ?? null,
|
|
mediaInfo.resolution ?? null,
|
|
mediaInfo.fps ?? null,
|
|
mediaInfo.durationMs ?? null,
|
|
mediaInfo.fileSizeBytes ?? fileSize,
|
|
assetId,
|
|
]
|
|
);
|
|
|
|
// Hand off to the proxy queue — identical payload shape to upload.js so
|
|
// the proxy worker doesn't need to know this came from an import.
|
|
await proxyQueue.add('generate', {
|
|
assetId,
|
|
inputKey: originalKey,
|
|
outputKey: `proxies/${assetId}.mp4`,
|
|
});
|
|
|
|
console.log(`[youtube] Asset ${assetId} imported (${meta.title || 'untitled'}); proxy job queued`);
|
|
await job.updateProgress(100);
|
|
return { assetId, originalKey };
|
|
} catch (error) {
|
|
console.error(`[youtube] Import failed for asset ${assetId}:`, error.message);
|
|
await query(
|
|
`UPDATE assets SET status = 'error', updated_at = NOW() WHERE id = $1`,
|
|
[assetId]
|
|
);
|
|
throw error;
|
|
} finally {
|
|
await rm(workDir, { recursive: true, force: true }).catch(() => {});
|
|
}
|
|
};
|