dragonflight/services/mam-api/src/routes/jobs.js
opencode 04ce096e67 chore: 1.2 ship-prep sweep — close 38 issues
Frontend / UX / a11y
- Sidebar collapse/expand toggle with localStorage persistence (#142)
- Settings sections wrap inputs in <form> with Enter-to-submit + native
  validation; password autocomplete=new-password (#141, #138)
- Asset thumbnails get descriptive alt text (#140)
- Production deploy now precompiles JSX via esbuild and loads the
  production React UMD instead of dev builds + in-browser Babel (#139,
  #122)
- Search wrapper gets role=search; global search input gets aria-label,
  role=combobox, aria-controls/aria-expanded/aria-activedescendant
  wiring (#137, #135)
- Dashboard and Library no longer share the same nav icon (#136)
- Sidebar collapses off-canvas with a topbar menu button below 768 px;
  mobile default is collapsed (#134)
- --text-3 bumped to #8B92A0 for WCAG AA contrast on --bg-0 (#133)
- Schedule and Library routes were rendering empty inside the .main
  flex container — switched to flex:1 + min-height:0 (#131, #132,
  editor + asset detail get the same fix)
- Jobs nav badge now polls /jobs?status=active every 10 s and reflects
  the live count (#130, #113)
- aria-label sweep on every icon-only button (#126)
- Premiere panel release list moved to window.PREMIERE_RELEASES in
  data.jsx; Editor + Settings read from the same source (#125)
- Typo setPgMclips → setPgmClips (#124)
- Stray console.error / console.warn calls gated behind
  window.DF_LOG.{warn,error} (#123)
- Hardcoded /api/v1 paths route through window.ZAMPP_API_PREFIX (#115)
- Schedule rows no longer crash on null recorder_id (#117)
- EditorKeyboard guards against document.activeElement === null (#116)
- Unmount-safe timers for PasswordResetModal, Containers, Editor (#111)
- Player seek clamps below totalMs, server-side range clamping +
  uncached 416 on EOF, client-side EOF-stall watchdog (#143)
- Duration badge overlap fix on narrow asset cards (#52)

Backend / security / reliability
- GET /recorders fixed N+1: single LATERAL JOIN for live_asset_id;
  Docker inspects bounded to actually-recording rows (#121)
- Upload disk-storage (multer.diskStorage) streams parts to S3 instead
  of buffering 500 MB in RAM (#120)
- /assets list clamps limit to MAX_LIMIT=500 to prevent OOM (#119)
- SDK upload archive listing + post-extract sanitize block zip-slip /
  tar-slip and symlink escapes (#118)
- Migrations track applied state in schema_migrations, run in a
  transaction, and exit non-zero on failure (#107)
- node-agent BMD_COUNT override uses BMD_DEVICE_PREFIX; filesystem
  detection wins (#109, #127)
- GPU_COUNT override now merges with nvidia-smi enrichment (#108)
- /cluster/heartbeat requires a node-bound token or admin user;
  tokens carry bound_hostname (#106)
- /recorders/:id/start error responses no longer echo the Docker
  create payload — env vars stay out of client responses (#105)
- /recorders/probe restricts schemes (srt/rtmp/rtsp/udp/rtp), blocks
  private + loopback hosts for non-admins, denies common service
  ports (#104)
- Scheduler tick guarded by a Postgres advisory lock; pending/running
  rows claimed via UPDATE...RETURNING + FOR UPDATE SKIP LOCKED to
  survive multi-node deploys (#103)
- UUID validateUuid('id') param middleware on every /:id route (#102)
- Error handler scrubs Postgres error messages and 5xx detail (#101)
- Graceful SIGTERM/SIGINT shutdown — stops scheduler, drains the HTTP
  server, ends the pool, 25 s force-exit watchdog (#100)
- AMPP sync moved from fire-and-forget to a persisted retry queue
  (ampp_sync_status / attempts / next_attempt_at + scheduler retry
  loop with exponential backoff) (#77)

Migrations
- 019: api_tokens.bound_hostname (#106)
- 020: assets.ampp_sync_status + retry bookkeeping (#77)

Other
- Defer #92 Growing-files per-upload toggle, #80 Audio tab, #57
  Dashboard redesign, #56 Editor SPA polish phase 3, #114 S3
  migration tool to v1.3
2026-05-27 02:06:14 +00:00

338 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import express from 'express';
import pool from '../db/pool.js';
import { requireAuth } from '../middleware/auth.js';
import { validateUuid } from '../middleware/errors.js';
import { Queue } from 'bullmq';
const router = express.Router();
router.use(requireAuth);
router.param('id', (req, res, next) => validateUuid('id')(req, res, next));
// ── Redis connection ──────────────────────────────────────────────────────────
const parseRedisUrl = (url) => {
try {
const parsed = new URL(url);
return { host: parsed.hostname, port: parseInt(parsed.port, 10) || 6379 };
} catch {
return { host: 'localhost', port: 6379 };
}
};
const redisConn = parseRedisUrl(process.env.REDIS_URL || 'redis://queue:6379');
const proxyQueue = new Queue('proxy', { connection: redisConn });
const thumbnailQueue = new Queue('thumbnail', { connection: redisConn });
const filmstripQueue = new Queue('filmstrip', { connection: redisConn });
const conformQueue = new Queue('conform', { connection: redisConn });
const importQueue = new Queue('import', { connection: redisConn });
const trimQueue = new Queue('trim', { connection: redisConn });
const QUEUES = [
{ queue: proxyQueue, type: 'proxy' },
{ queue: thumbnailQueue, type: 'thumbnail' },
{ queue: filmstripQueue, type: 'filmstrip' },
{ queue: conformQueue, type: 'conform' },
{ queue: importQueue, type: 'import' },
{ queue: trimQueue, type: 'trim' },
];
// BullMQ state → API status mapping
const STATE_MAP = {
waiting: 'waiting',
active: 'active',
completed: 'completed',
failed: 'failed',
delayed: 'waiting',
paused: 'waiting',
};
// Ordered state buckets used for bulk fetch — avoids N+1 getState() calls.
const STATE_BUCKETS = ['active', 'waiting', 'completed', 'failed', 'delayed', 'paused'];
function normalizeJob(bullJob, type, apiStatus) {
const isCompleted = apiStatus === 'completed';
const isFailed = apiStatus === 'failed';
return {
id: `${type}:${bullJob.id}`,
type,
status: apiStatus,
progress: typeof bullJob.progress === 'number' ? bullJob.progress : 0,
asset_id: bullJob.data?.assetId || null,
asset_name: bullJob.data?.assetName || null,
created_at: bullJob.timestamp ? new Date(bullJob.timestamp).toISOString() : null,
started_at: bullJob.processedOn ? new Date(bullJob.processedOn).toISOString() : null,
completed_at: isCompleted && bullJob.finishedOn ? new Date(bullJob.finishedOn).toISOString() : null,
failed_at: isFailed && bullJob.finishedOn ? new Date(bullJob.finishedOn).toISOString() : null,
error: bullJob.failedReason || null,
metadata: bullJob.data || {},
};
}
// Fetch all jobs from all queues in bulk by state bucket (no per-job getState() calls).
async function getAllBullMQJobs() {
const results = [];
for (const { queue, type } of QUEUES) {
for (const bucket of STATE_BUCKETS) {
try {
const apiStatus = STATE_MAP[bucket] || bucket;
const jobs = await queue.getJobs([bucket], 0, 200);
for (const job of jobs) {
results.push(normalizeJob(job, type, apiStatus));
}
} catch {
// queue or bucket unavailable — skip
}
}
}
return results;
}
// Mutate `jobs` in place to fill in asset_name from the assets table for any
// job that has an assetId but no inline assetName in its payload. One bulk
// SQL query per refresh — cheap, and means we don't have to remember to pass
// assetName at every enqueue site (upload.js, capture stop, scheduler, etc.).
async function attachAssetNames(jobs) {
const idsNeedingLookup = [...new Set(
jobs.filter(j => j.asset_id && !j.asset_name).map(j => j.asset_id)
)];
if (idsNeedingLookup.length === 0) return;
let rows = [];
try {
const result = await pool.query(
'SELECT id, display_name, filename FROM assets WHERE id = ANY($1::uuid[])',
[idsNeedingLookup]
);
rows = result.rows;
} catch {
// If the lookup fails (DB down, bad UUID in a stale BullMQ payload), keep
// serving jobs without names rather than 500-ing the whole list.
return;
}
const byId = new Map(rows.map(r => [r.id, r.display_name || r.filename]));
for (const j of jobs) {
if (j.asset_id && !j.asset_name) {
const name = byId.get(j.asset_id);
if (name) j.asset_name = name;
}
}
}
// ── GET /events Server-Sent Events stream of live job updates ───────────────
router.get('/events', async (req, res) => {
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
res.setHeader('X-Accel-Buffering', 'no');
res.flushHeaders();
let closed = false;
req.on('close', () => { closed = true; });
const push = async () => {
if (closed) return;
try {
const jobs = await getAllBullMQJobs();
await attachAssetNames(jobs);
if (!closed) res.write(`data: ${JSON.stringify({ type: 'jobs', jobs })}\n\n`);
} catch (err) {
if (!closed) res.write(`data: ${JSON.stringify({ type: 'error', message: err.message })}\n\n`);
}
if (!closed) setTimeout(push, 2000);
};
await push();
});
// Fetch DB-tracked jobs (e.g. trim) and normalize to the same shape as BullMQ jobs.
// Only returns non-expired rows.
async function getDbJobs() {
try {
const result = await pool.query(
`SELECT j.id, j.type, j.status, j.payload, j.created_at, j.updated_at,
ts.asset_id
FROM jobs j
LEFT JOIN temp_segments ts ON ts.job_id = j.id
WHERE (j.expires_at IS NULL OR j.expires_at > NOW())
ORDER BY j.created_at DESC
LIMIT 200`
);
// Dedupe — multiple temp_segments per job, take first asset_id found
const seen = new Map();
for (const row of result.rows) {
if (!seen.has(row.id)) {
seen.set(row.id, {
id: `trim:${row.id}`,
type: row.type,
status: row.status === 'completed' ? 'completed' : row.status,
progress: row.status === 'completed' ? 100 : (row.status === 'failed' ? 0 : 50),
asset_id: row.asset_id || null,
asset_name: null,
created_at: row.created_at ? new Date(row.created_at).toISOString() : null,
started_at: null,
completed_at: row.status === 'completed' && row.updated_at ? new Date(row.updated_at).toISOString() : null,
failed_at: row.status === 'failed' && row.updated_at ? new Date(row.updated_at).toISOString() : null,
error: null,
metadata: row.payload || {},
});
}
}
return [...seen.values()];
} catch {
return [];
}
}
// ── GET / - List jobs (BullMQ queues + DB trim jobs) ─────────────────────────
router.get('/', async (req, res, next) => {
try {
const { type, status, asset_id } = req.query;
let jobs = await getAllBullMQJobs();
const dbJobs = await getDbJobs();
jobs = jobs.concat(dbJobs);
await attachAssetNames(jobs);
if (type) jobs = jobs.filter(j => j.type === type);
if (status) jobs = jobs.filter(j => j.status === status);
if (asset_id) jobs = jobs.filter(j => j.asset_id === asset_id);
jobs.sort((a, b) => new Date(b.created_at || 0) - new Date(a.created_at || 0));
res.json(jobs);
} catch (err) {
next(err);
}
});
// ── GET /:id - Single job ─────────────────────────────────────────────────────
router.get('/:id', async (req, res, next) => {
try {
const { id } = req.params;
const colonIdx = id.indexOf(':');
const qType = colonIdx > -1 ? id.slice(0, colonIdx) : null;
const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;
for (const { queue, type } of QUEUES) {
if (qType && type !== qType) continue;
try {
const job = await queue.getJob(bullId);
if (job) {
const state = await job.getState();
const apiStatus = STATE_MAP[state] || state;
const normalized = normalizeJob(job, type, apiStatus);
await attachAssetNames([normalized]);
return res.json(normalized);
}
} catch { /* try next queue */ }
}
res.status(404).json({ error: 'Job not found' });
} catch (err) {
next(err);
}
});
// ── POST /:id/retry - Retry a failed job ──────────────────────────────────────
router.post('/:id/retry', async (req, res, next) => {
try {
const { id } = req.params;
const colonIdx = id.indexOf(':');
const qType = colonIdx > -1 ? id.slice(0, colonIdx) : null;
const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;
for (const { queue, type } of QUEUES) {
if (qType && type !== qType) continue;
try {
const job = await queue.getJob(bullId);
if (job) {
await job.retry();
return res.json({ id, status: 'queued' });
}
} catch { /* try next queue */ }
}
res.status(404).json({ error: 'Job not found' });
} catch (err) {
next(err);
}
});
// ── DELETE /:id - Remove a job (also handles cancel for active jobs) ─────────
// BullMQ refuses job.remove() while a job is in the 'active' state. Before this
// fix the route caught that error and fell through to a misleading 404, so
// operators couldn't kill a stalled-active job from the UI. Now we detect the
// active state explicitly: moveToFailed with the magic '0' token bypasses the
// per-worker lock check and transitions active → failed (freeing the queue's
// concurrency slot), then remove() drops the row.
router.delete('/:id', async (req, res, next) => {
try {
const { id } = req.params;
const colonIdx = id.indexOf(':');
const qType = colonIdx > -1 ? id.slice(0, colonIdx) : null;
const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;
let lastErr = null;
for (const { queue, type } of QUEUES) {
if (qType && type !== qType) continue;
let job;
try {
job = await queue.getJob(bullId);
} catch (err) {
// Queue-level lookup error: remember it so we don't mask it with 404.
lastErr = err;
continue;
}
if (!job) continue;
const state = await job.getState();
if (state === 'active') {
// Token '0' tells BullMQ to skip the worker-lock check — necessary
// because the operator-side cancel doesn't hold the worker's lock.
try {
await job.moveToFailed(new Error('Cancelled by operator'), '0', false);
} catch (err) {
// Lock owned by a still-living worker; fall back to discard + remove
// so at least the result is thrown away and the row is gone.
try { await job.discard(); } catch (_) {}
}
}
try {
await job.remove();
} catch (err) {
// Last-resort obliteration of the job row via raw Redis. This is
// the path stalled jobs hit when moveToFailed couldn't transition
// them either.
const client = await queue.client;
const prefix = queue.toKey(bullId);
await client.del(prefix);
}
return res.json({ success: true, cancelled: state === 'active' });
}
if (lastErr) return next(lastErr);
res.status(404).json({ error: 'Job not found' });
} catch (err) {
next(err);
}
});
// ── POST /conform - Submit a conform (EDL export) job ────────────────────────
router.post('/conform', async (req, res, next) => {
try {
const { edl, project_id, output_format } = req.body;
if (!edl || !project_id || !output_format) {
return res.status(400).json({
error: 'edl, project_id, and output_format are required',
});
}
const bullJob = await conformQueue.add('conform-task', {
edl,
projectId: project_id,
outputFormat: output_format,
});
res.status(202).json({ id: `conform:${bullJob.id}`, status: 'queued' });
} catch (err) {
next(err);
}
});
export default router;