dragonflight/services/mam-api/src/routes/jobs.js
Zac Gaetano e51cf1aa9c feat(jobs): surface playout-stage queue in Jobs screen
- jobs.js: add playout-stage BullMQ queue to QUEUES; asset_id from
  job data is already resolved to a name by attachAssetNames
- screens-jobs.jsx: map type 'playout-stage' -> kind 'Stage' with
  monitor icon

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-31 12:06:08 -04:00

346 lines
13 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import express from 'express';
import pool from '../db/pool.js';
import { Queue } from 'bullmq';
import { assertProjectAccess } from '../auth/authz.js';
const router = express.Router();
// Note: jobs use BullMQ id format "<queueType>:<bullId>" (e.g. "conform:42"),
// NOT UUIDs. The GET/:id, POST/:id/retry, and DELETE/:id handlers below split
// on the colon themselves and look up the queue. Adding a UUID validator
// here would 400 every BullMQ poll the panel makes (which is exactly what
// caused Export Timeline to stall "Rendering Hi-Res" forever — fixed 2026-05-28).
// ── Redis connection ──────────────────────────────────────────────────────────
const parseRedisUrl = (url) => {
try {
const parsed = new URL(url);
return { host: parsed.hostname, port: parseInt(parsed.port, 10) || 6379 };
} catch {
return { host: 'localhost', port: 6379 };
}
};
const redisConn = parseRedisUrl(process.env.REDIS_URL || 'redis://queue:6379');
const proxyQueue = new Queue('proxy', { connection: redisConn });
const thumbnailQueue = new Queue('thumbnail', { connection: redisConn });
const filmstripQueue = new Queue('filmstrip', { connection: redisConn });
const conformQueue = new Queue('conform', { connection: redisConn });
const importQueue = new Queue('import', { connection: redisConn });
const trimQueue = new Queue('trim', { connection: redisConn });
const playoutStageQueue = new Queue('playout-stage', { connection: redisConn });
const QUEUES = [
{ queue: proxyQueue, type: 'proxy' },
{ queue: thumbnailQueue, type: 'thumbnail' },
{ queue: filmstripQueue, type: 'filmstrip' },
{ queue: conformQueue, type: 'conform' },
{ queue: importQueue, type: 'import' },
{ queue: trimQueue, type: 'trim' },
{ queue: playoutStageQueue, type: 'playout-stage' },
];
// BullMQ state → API status mapping
const STATE_MAP = {
waiting: 'waiting',
active: 'active',
completed: 'completed',
failed: 'failed',
delayed: 'waiting',
paused: 'waiting',
};
// Ordered state buckets used for bulk fetch — avoids N+1 getState() calls.
const STATE_BUCKETS = ['active', 'waiting', 'completed', 'failed', 'delayed', 'paused'];
function normalizeJob(bullJob, type, apiStatus) {
const isCompleted = apiStatus === 'completed';
const isFailed = apiStatus === 'failed';
return {
id: `${type}:${bullJob.id}`,
type,
status: apiStatus,
progress: typeof bullJob.progress === 'number' ? bullJob.progress : 0,
asset_id: bullJob.data?.assetId || null,
asset_name: bullJob.data?.assetName || null,
created_at: bullJob.timestamp ? new Date(bullJob.timestamp).toISOString() : null,
started_at: bullJob.processedOn ? new Date(bullJob.processedOn).toISOString() : null,
completed_at: isCompleted && bullJob.finishedOn ? new Date(bullJob.finishedOn).toISOString() : null,
failed_at: isFailed && bullJob.finishedOn ? new Date(bullJob.finishedOn).toISOString() : null,
error: bullJob.failedReason || null,
metadata: bullJob.data || {},
};
}
// Fetch all jobs from all queues in bulk by state bucket (no per-job getState() calls).
async function getAllBullMQJobs() {
const results = [];
for (const { queue, type } of QUEUES) {
for (const bucket of STATE_BUCKETS) {
try {
const apiStatus = STATE_MAP[bucket] || bucket;
const jobs = await queue.getJobs([bucket], 0, 200);
for (const job of jobs) {
results.push(normalizeJob(job, type, apiStatus));
}
} catch {
// queue or bucket unavailable — skip
}
}
}
return results;
}
// Mutate `jobs` in place to fill in asset_name from the assets table for any
// job that has an assetId but no inline assetName in its payload. One bulk
// SQL query per refresh — cheap, and means we don't have to remember to pass
// assetName at every enqueue site (upload.js, capture stop, scheduler, etc.).
async function attachAssetNames(jobs) {
const idsNeedingLookup = [...new Set(
jobs.filter(j => j.asset_id && !j.asset_name).map(j => j.asset_id)
)];
if (idsNeedingLookup.length === 0) return;
let rows = [];
try {
const result = await pool.query(
'SELECT id, display_name, filename FROM assets WHERE id = ANY($1::uuid[])',
[idsNeedingLookup]
);
rows = result.rows;
} catch {
// If the lookup fails (DB down, bad UUID in a stale BullMQ payload), keep
// serving jobs without names rather than 500-ing the whole list.
return;
}
const byId = new Map(rows.map(r => [r.id, r.display_name || r.filename]));
for (const j of jobs) {
if (j.asset_id && !j.asset_name) {
const name = byId.get(j.asset_id);
if (name) j.asset_name = name;
}
}
}
// ── GET /events Server-Sent Events stream of live job updates ───────────────
router.get('/events', async (req, res) => {
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no');
res.flushHeaders();
let closed = false;
req.on('close', () => { closed = true; });
const push = async () => {
if (closed) return;
try {
const jobs = await getAllBullMQJobs();
await attachAssetNames(jobs);
if (!closed) res.write(`data: ${JSON.stringify({ type: 'jobs', jobs })}\n\n`);
} catch (err) {
if (!closed) res.write(`data: ${JSON.stringify({ type: 'error', message: err.message })}\n\n`);
}
if (!closed) setTimeout(push, 2000);
};
await push();
});
// Fetch DB-tracked jobs (e.g. trim) and normalize to the same shape as BullMQ jobs.
// Only returns non-expired rows.
async function getDbJobs() {
try {
const result = await pool.query(
`SELECT j.id, j.type, j.status, j.payload, j.created_at, j.updated_at,
ts.asset_id
FROM jobs j
LEFT JOIN temp_segments ts ON ts.job_id = j.id
WHERE (j.expires_at IS NULL OR j.expires_at > NOW())
ORDER BY j.created_at DESC
LIMIT 200`
);
// Dedupe — multiple temp_segments per job, take first asset_id found
const seen = new Map();
for (const row of result.rows) {
if (!seen.has(row.id)) {
seen.set(row.id, {
id: `trim:${row.id}`,
type: row.type,
status: row.status === 'completed' ? 'completed' : row.status,
progress: row.status === 'completed' ? 100 : (row.status === 'failed' ? 0 : 50),
asset_id: row.asset_id || null,
asset_name: null,
created_at: row.created_at ? new Date(row.created_at).toISOString() : null,
started_at: null,
completed_at: row.status === 'completed' && row.updated_at ? new Date(row.updated_at).toISOString() : null,
failed_at: row.status === 'failed' && row.updated_at ? new Date(row.updated_at).toISOString() : null,
error: null,
metadata: row.payload || {},
});
}
}
return [...seen.values()];
} catch {
return [];
}
}
// ── GET / - List jobs (BullMQ queues + DB trim jobs) ─────────────────────────
router.get('/', async (req, res, next) => {
try {
const { type, status, asset_id } = req.query;
let jobs = await getAllBullMQJobs();
const dbJobs = await getDbJobs();
jobs = jobs.concat(dbJobs);
await attachAssetNames(jobs);
if (type) jobs = jobs.filter(j => j.type === type);
if (status) jobs = jobs.filter(j => j.status === status);
if (asset_id) jobs = jobs.filter(j => j.asset_id === asset_id);
jobs.sort((a, b) => new Date(b.created_at || 0) - new Date(a.created_at || 0));
res.json(jobs);
} catch (err) {
next(err);
}
});
// ── GET /:id - Single job ─────────────────────────────────────────────────────
router.get('/:id', async (req, res, next) => {
try {
const { id } = req.params;
const colonIdx = id.indexOf(':');
const qType = colonIdx > -1 ? id.slice(0, colonIdx) : null;
const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;
for (const { queue, type } of QUEUES) {
if (qType && type !== qType) continue;
try {
const job = await queue.getJob(bullId);
if (job) {
const state = await job.getState();
const apiStatus = STATE_MAP[state] || state;
const normalized = normalizeJob(job, type, apiStatus);
await attachAssetNames([normalized]);
return res.json(normalized);
}
} catch { /* try next queue */ }
}
res.status(404).json({ error: 'Job not found' });
} catch (err) {
next(err);
}
});
// ── POST /:id/retry - Retry a failed job ──────────────────────────────────────
router.post('/:id/retry', async (req, res, next) => {
try {
const { id } = req.params;
const colonIdx = id.indexOf(':');
const qType = colonIdx > -1 ? id.slice(0, colonIdx) : null;
const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;
for (const { queue, type } of QUEUES) {
if (qType && type !== qType) continue;
try {
const job = await queue.getJob(bullId);
if (job) {
await job.retry();
return res.json({ id, status: 'queued' });
}
} catch { /* try next queue */ }
}
res.status(404).json({ error: 'Job not found' });
} catch (err) {
next(err);
}
});
// ── DELETE /:id - Remove a job (also handles cancel for active jobs) ─────────
// BullMQ refuses job.remove() while a job is in the 'active' state. Before this
// fix the route caught that error and fell through to a misleading 404, so
// operators couldn't kill a stalled-active job from the UI. Now we detect the
// active state explicitly: moveToFailed with the magic '0' token bypasses the
// per-worker lock check and transitions active → failed (freeing the queue's
// concurrency slot), then remove() drops the row.
router.delete('/:id', async (req, res, next) => {
try {
const { id } = req.params;
const colonIdx = id.indexOf(':');
const qType = colonIdx > -1 ? id.slice(0, colonIdx) : null;
const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;
let lastErr = null;
for (const { queue, type } of QUEUES) {
if (qType && type !== qType) continue;
let job;
try {
job = await queue.getJob(bullId);
} catch (err) {
// Queue-level lookup error: remember it so we don't mask it with 404.
lastErr = err;
continue;
}
if (!job) continue;
const state = await job.getState();
if (state === 'active') {
// Token '0' tells BullMQ to skip the worker-lock check — necessary
// because the operator-side cancel doesn't hold the worker's lock.
try {
await job.moveToFailed(new Error('Cancelled by operator'), '0', false);
} catch (err) {
// Lock owned by a still-living worker; fall back to discard + remove
// so at least the result is thrown away and the row is gone.
try { await job.discard(); } catch (_) {}
}
}
try {
await job.remove();
} catch (err) {
// Last-resort obliteration of the job row via raw Redis. This is
// the path stalled jobs hit when moveToFailed couldn't transition
// them either.
const client = await queue.client;
const prefix = queue.toKey(bullId);
await client.del(prefix);
}
return res.json({ success: true, cancelled: state === 'active' });
}
if (lastErr) return next(lastErr);
res.status(404).json({ error: 'Job not found' });
} catch (err) {
next(err);
}
});
// ── POST /conform - Submit a conform (EDL export) job ────────────────────────
router.post('/conform', async (req, res, next) => {
try {
const { edl, project_id, output_format } = req.body;
if (!edl || !project_id || !output_format) {
return res.status(400).json({
error: 'edl, project_id, and output_format are required',
});
}
// Conform writes back into a project — require edit on that project. Without
// this, any logged-in user could enqueue conform jobs targeting any project.
await assertProjectAccess(req.user, project_id, 'edit');
const bullJob = await conformQueue.add('conform-task', {
edl,
projectId: project_id,
outputFormat: output_format,
});
res.status(202).json({ id: `conform:${bullJob.id}`, status: 'queued' });
} catch (err) {
next(err);
}
});
export default router;