dragonflight/services/mam-api/src/routes/jobs.js

import express from 'express';
import pool from '../db/pool.js';
import { Queue } from 'bullmq';

const router = express.Router();
// Note: jobs use BullMQ id format "<queueType>:<bullId>" (e.g. "conform:42"),
// NOT UUIDs. The GET/:id, POST/:id/retry, and DELETE/:id handlers below split
// on the colon themselves and look up the queue. Adding a UUID validator
// here would 400 every BullMQ poll the panel makes (which is exactly what
// caused Export Timeline to stall "Rendering Hi-Res" forever — fixed 2026-05-28).

// ── Redis connection ──────────────────────────────────────────────────────────
const parseRedisUrl = (url) => {
  try {
    const parsed = new URL(url);
    return { host: parsed.hostname, port: parseInt(parsed.port, 10) || 6379 };
  } catch {
    return { host: 'localhost', port: 6379 };
  }
};

const redisConn = parseRedisUrl(process.env.REDIS_URL || 'redis://queue:6379');

const proxyQueue      = new Queue('proxy',     { connection: redisConn });
const thumbnailQueue  = new Queue('thumbnail', { connection: redisConn });
const filmstripQueue  = new Queue('filmstrip', { connection: redisConn });
const conformQueue    = new Queue('conform',   { connection: redisConn });
const importQueue     = new Queue('import',    { connection: redisConn });
const trimQueue       = new Queue('trim',      { connection: redisConn });

const QUEUES = [
  { queue: proxyQueue,      type: 'proxy'     },
  { queue: thumbnailQueue,  type: 'thumbnail' },
  { queue: filmstripQueue,  type: 'filmstrip' },
  { queue: conformQueue,    type: 'conform'   },
  { queue: importQueue,     type: 'import'    },
  { queue: trimQueue,       type: 'trim'      },
];

// BullMQ state → API status mapping
const STATE_MAP = {
  waiting:   'waiting',
  active:    'active',
  completed: 'completed',
  failed:    'failed',
  delayed:   'waiting',
  paused:    'waiting',
};

// Ordered state buckets used for bulk fetch — avoids N+1 getState() calls.
const STATE_BUCKETS = ['active', 'waiting', 'completed', 'failed', 'delayed', 'paused'];

function normalizeJob(bullJob, type, apiStatus) {
  const isCompleted = apiStatus === 'completed';
  const isFailed    = apiStatus === 'failed';
  return {
    id:           `${type}:${bullJob.id}`,
    type,
    status:       apiStatus,
    progress:     typeof bullJob.progress === 'number' ? bullJob.progress : 0,
    asset_id:     bullJob.data?.assetId    || null,
    asset_name:   bullJob.data?.assetName  || null,
    created_at:   bullJob.timestamp   ? new Date(bullJob.timestamp).toISOString()   : null,
    started_at:   bullJob.processedOn ? new Date(bullJob.processedOn).toISOString() : null,
    completed_at: isCompleted && bullJob.finishedOn ? new Date(bullJob.finishedOn).toISOString() : null,
    failed_at:    isFailed    && bullJob.finishedOn ? new Date(bullJob.finishedOn).toISOString() : null,
    error:        bullJob.failedReason || null,
    metadata:     bullJob.data || {},
  };
}

// Fetch all jobs from all queues in bulk by state bucket (no per-job getState() calls).
async function getAllBullMQJobs() {
  const results = [];
  for (const { queue, type } of QUEUES) {
    for (const bucket of STATE_BUCKETS) {
      try {
        const apiStatus = STATE_MAP[bucket] || bucket;
        const jobs = await queue.getJobs([bucket], 0, 200);
        for (const job of jobs) {
          results.push(normalizeJob(job, type, apiStatus));
        }
      } catch {
        // queue or bucket unavailable — skip
      }
    }
  }
  return results;
}

// Mutate `jobs` in place to fill in asset_name from the assets table for any
// job that has an assetId but no inline assetName in its payload. One bulk
// SQL query per refresh — cheap, and means we don't have to remember to pass
// assetName at every enqueue site (upload.js, capture stop, scheduler, etc.).
async function attachAssetNames(jobs) {
  const idsNeedingLookup = [...new Set(
    jobs.filter(j => j.asset_id && !j.asset_name).map(j => j.asset_id)
  )];
  if (idsNeedingLookup.length === 0) return;

  let rows = [];
  try {
    const result = await pool.query(
      'SELECT id, display_name, filename FROM assets WHERE id = ANY($1::uuid[])',
      [idsNeedingLookup]
    );
    rows = result.rows;
  } catch {
    // If the lookup fails (DB down, bad UUID in a stale BullMQ payload), keep
    // serving jobs without names rather than 500-ing the whole list.
    return;
  }
  const byId = new Map(rows.map(r => [r.id, r.display_name || r.filename]));
  for (const j of jobs) {
    if (j.asset_id && !j.asset_name) {
      const name = byId.get(j.asset_id);
      if (name) j.asset_name = name;
    }
  }
}

// ── GET /events – Server-Sent Events stream of live job updates ───────────────
router.get('/events', async (req, res) => {
  res.setHeader('Content-Type',      'text/event-stream');
  res.setHeader('Cache-Control',     'no-cache');
  res.setHeader('Connection',        'keep-alive');
  res.setHeader('X-Accel-Buffering', 'no');
  res.flushHeaders();

  let closed = false;
  req.on('close', () => { closed = true; });

  const push = async () => {
    if (closed) return;
    try {
      const jobs = await getAllBullMQJobs();
      await attachAssetNames(jobs);
      if (!closed) res.write(`data: ${JSON.stringify({ type: 'jobs', jobs })}\n\n`);
    } catch (err) {
      if (!closed) res.write(`data: ${JSON.stringify({ type: 'error', message: err.message })}\n\n`);
    }
    if (!closed) setTimeout(push, 2000);
  };

  await push();
});

// Fetch DB-tracked jobs (e.g. trim) and normalize to the same shape as BullMQ jobs.
// Only returns non-expired rows.
async function getDbJobs() {
  try {
    const result = await pool.query(
      `SELECT j.id, j.type, j.status, j.payload, j.created_at, j.updated_at,
              ts.asset_id
       FROM jobs j
       LEFT JOIN temp_segments ts ON ts.job_id = j.id
       WHERE (j.expires_at IS NULL OR j.expires_at > NOW())
       ORDER BY j.created_at DESC
       LIMIT 200`
    );
    // Dedupe — multiple temp_segments per job, take first asset_id found
    const seen = new Map();
    for (const row of result.rows) {
      if (!seen.has(row.id)) {
        seen.set(row.id, {
          id:           `trim:${row.id}`,
          type:         row.type,
          status:       row.status === 'completed' ? 'completed' : row.status,
          progress:     row.status === 'completed' ? 100 : (row.status === 'failed' ? 0 : 50),
          asset_id:     row.asset_id || null,
          asset_name:   null,
          created_at:   row.created_at ? new Date(row.created_at).toISOString() : null,
          started_at:   null,
          completed_at: row.status === 'completed' && row.updated_at ? new Date(row.updated_at).toISOString() : null,
          failed_at:    row.status === 'failed'    && row.updated_at ? new Date(row.updated_at).toISOString() : null,
          error:        null,
          metadata:     row.payload || {},
        });
      }
    }
    return [...seen.values()];
  } catch {
    return [];
  }
}

// ── GET / - List jobs (BullMQ queues + DB trim jobs) ─────────────────────────
router.get('/', async (req, res, next) => {
  try {
    const { type, status, asset_id } = req.query;
    let jobs = await getAllBullMQJobs();
    const dbJobs = await getDbJobs();
    jobs = jobs.concat(dbJobs);
    await attachAssetNames(jobs);

    if (type)     jobs = jobs.filter(j => j.type === type);
    if (status)   jobs = jobs.filter(j => j.status === status);
    if (asset_id) jobs = jobs.filter(j => j.asset_id === asset_id);

    jobs.sort((a, b) => new Date(b.created_at || 0) - new Date(a.created_at || 0));
    res.json(jobs);
  } catch (err) {
    next(err);
  }
});

// ── GET /:id - Single job ─────────────────────────────────────────────────────
router.get('/:id', async (req, res, next) => {
  try {
    const { id } = req.params;
    const colonIdx = id.indexOf(':');
    const qType  = colonIdx > -1 ? id.slice(0, colonIdx) : null;
    const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;

    for (const { queue, type } of QUEUES) {
      if (qType && type !== qType) continue;
      try {
        const job = await queue.getJob(bullId);
        if (job) {
          const state = await job.getState();
          const apiStatus = STATE_MAP[state] || state;
          const normalized = normalizeJob(job, type, apiStatus);
          await attachAssetNames([normalized]);
          return res.json(normalized);
        }
      } catch { /* try next queue */ }
    }
    res.status(404).json({ error: 'Job not found' });
  } catch (err) {
    next(err);
  }
});

// ── POST /:id/retry - Retry a failed job ──────────────────────────────────────
router.post('/:id/retry', async (req, res, next) => {
  try {
    const { id } = req.params;
    const colonIdx = id.indexOf(':');
    const qType  = colonIdx > -1 ? id.slice(0, colonIdx) : null;
    const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;

    for (const { queue, type } of QUEUES) {
      if (qType && type !== qType) continue;
      try {
        const job = await queue.getJob(bullId);
        if (job) {
          await job.retry();
          return res.json({ id, status: 'queued' });
        }
      } catch { /* try next queue */ }
    }
    res.status(404).json({ error: 'Job not found' });
  } catch (err) {
    next(err);
  }
});

// ── DELETE /:id - Remove a job (also handles cancel for active jobs) ─────────
// BullMQ refuses job.remove() while a job is in the 'active' state. Before this
// fix the route caught that error and fell through to a misleading 404, so
// operators couldn't kill a stalled-active job from the UI. Now we detect the
// active state explicitly: moveToFailed with the magic '0' token bypasses the
// per-worker lock check and transitions active → failed (freeing the queue's
// concurrency slot), then remove() drops the row.
router.delete('/:id', async (req, res, next) => {
  try {
    const { id } = req.params;
    const colonIdx = id.indexOf(':');
    const qType  = colonIdx > -1 ? id.slice(0, colonIdx) : null;
    const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;

    let lastErr = null;
    for (const { queue, type } of QUEUES) {
      if (qType && type !== qType) continue;
      let job;
      try {
        job = await queue.getJob(bullId);
      } catch (err) {
        // Queue-level lookup error: remember it so we don't mask it with 404.
        lastErr = err;
        continue;
      }
      if (!job) continue;

      const state = await job.getState();
      if (state === 'active') {
        // Token '0' tells BullMQ to skip the worker-lock check — necessary
        // because the operator-side cancel doesn't hold the worker's lock.
        try {
          await job.moveToFailed(new Error('Cancelled by operator'), '0', false);
        } catch (err) {
          // Lock owned by a still-living worker; fall back to discard + remove
          // so at least the result is thrown away and the row is gone.
          try { await job.discard(); } catch (_) {}
        }
      }
      try {
        await job.remove();
      } catch (err) {
        // Last-resort obliteration of the job row via raw Redis. This is
        // the path stalled jobs hit when moveToFailed couldn't transition
        // them either.
        const client = await queue.client;
        const prefix = queue.toKey(bullId);
        await client.del(prefix);
      }
      return res.json({ success: true, cancelled: state === 'active' });
    }
    if (lastErr) return next(lastErr);
    res.status(404).json({ error: 'Job not found' });
  } catch (err) {
    next(err);
  }
});

// ── POST /conform - Submit a conform (EDL export) job ────────────────────────
router.post('/conform', async (req, res, next) => {
  try {
    const { edl, project_id, output_format } = req.body;

    if (!edl || !project_id || !output_format) {
      return res.status(400).json({
        error: 'edl, project_id, and output_format are required',
      });
    }

    const bullJob = await conformQueue.add('conform-task', {
      edl,
      projectId:    project_id,
      outputFormat: output_format,
    });

    res.status(202).json({ id: `conform:${bullJob.id}`, status: 'queued' });
  } catch (err) {
    next(err);
  }
});

export default router;
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								import express from 'express';
 								import pool from '../db/pool.js';
 								import { Queue } from 'bullmq';
 								const router = express.Router();
-												fix(uxp+mam-api): Export Timeline render — xmeml schema + BullMQ job poll

Two cooperating bugs left Export Timeline stuck at "Rendering Hi-Res"
forever:

A. worker emitted "Invalid FCP XML: no sequence element" because
   Timeline.generateFcpXml produced fcpxml (FCP X schema:
   <fcpxml><resources>/<library>/...) while the worker's parseFcpXml
   expects xmeml (FCP 7 schema: <xmeml><sequence>...). Two completely
   different formats.

   Rewrite generateFcpXml to emit xmeml v5 with the structure the
   parser walks:
     xmeml/sequence/{name,duration,rate{timebase,ntsc},
                     media/video/{format/samplecharacteristics,
                                  track[@currentExplodedTrackIndex]
                                  /clipitem/{name,duration,rate,in,out,
                                             start,end,file/{name,pathurl}}}}
   Clipitem in/out are SOURCE frames (the underlying media in/out);
   start/end are TIMELINE frames (the cut position). The worker uses
   the rate timebase to parse them.

B. /api/v1/jobs/:id rejected the panel's polls with
   "Invalid id — must be a UUID". The handlers below correctly parse
   BullMQ-prefixed ids ("conform:42"), but router.param('id',
   validateUuid('id')) ran first and 400'd everything that wasn't a
   UUID. The panel's pollConform swallows the resulting fetch error
   silently and polls forever.

   Drop the validator. Comment in the file explains why.

Bumps panel to v2.2.2.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-05-28 13:58:13 -04:00
+								// Note: jobs use BullMQ id format "<queueType>:<bullId>" (e.g. "conform:42"),
 								// NOT UUIDs. The GET/:id, POST/:id/retry, and DELETE/:id handlers below split
 								// on the colon themselves and look up the queue. Adding a UUID validator
 								// here would 400 every BullMQ poll the panel makes (which is exactly what
 								// caused Export Timeline to stall "Rendering Hi-Res" forever — fixed 2026-05-28).
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								// ── Redis connection ──────────────────────────────────────────────────────────
-												fix(routes+ui): capture route bin optional, jobs Redis URL, recorders S3_REGION+stop codes, api.js full rewrite, upload.html multipart fix, capture.html bin guard: jobs.js

											
										
										
											2026-05-16 00:30:26 -04:00
+								const parseRedisUrl = (url) => {
 								  try {
 								    const parsed = new URL(url);
 								    return { host: parsed.hostname, port: parseInt(parsed.port, 10) || 6379 };
 								  } catch {
 								    return { host: 'localhost', port: 6379 };
 								  }
 								};
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								const redisConn = parseRedisUrl(process.env.REDIS_URL || 'redis://queue:6379');
-												feat: server-side filmstrip worker + fix scheduler crash + fix clip freeze

Root causes found:
1. Scheduler crashing every 15s: assets table has no error_message column.
   Fix: remove error_message from UPDATE in scheduler.js (#66 regression).

2. Clip freezing: client-side filmstrip seek loop runs on main thread,
   seeks same proxy the player is streaming → both stall → freeze.
   Fix: replace browser seek loop entirely with server-side FFmpeg worker.

3. No dedicated filmstrip worker: filmstrip was never pre-built server-side.

Changes:
- services/mam-api/src/db/migrations/018-add-filmstrip-s3-key.sql
  Add filmstrip_s3_key TEXT column to assets table

- services/worker/src/workers/filmstrip.js (new)
  BullMQ worker: downloads proxy, runs FFmpeg fps filter to extract
  28 evenly-spaced JPEG frames, base64-encodes them, uploads JSON
  array to S3 at filmstrips/<assetId>.json, stores key in DB

- services/worker/src/workers/thumbnail.js
  Queue filmstrip job automatically after thumbnail completes

- services/worker/src/index.js
  Register filmstrip worker (concurrency=2), export filmstripQueue
  singleton, close it on SIGTERM

- services/mam-api/src/routes/assets.js
  - filmstripQueue added
  - POST /reprocess?type=filmstrip now supported
  - GET /:id/filmstrip returns signed S3 URL for JSON frames

- services/mam-api/src/routes/jobs.js
  filmstrip queue visible in Jobs UI

- services/web-ui/public/screens-asset.jsx
  Replace browser seek loop with fetch of /assets/:id/filmstrip
  → fetch S3 JSON → render frames. Zero browser-side video seeking.
  Right-click and Files tab re-generate via API endpoint.

											
										
										
											2026-05-26 12:39:44 -04:00
+								const proxyQueue      = new Queue('proxy',     { connection: redisConn });
 								const thumbnailQueue  = new Queue('thumbnail', { connection: redisConn });
 								const filmstripQueue  = new Queue('filmstrip', { connection: redisConn });
 								const conformQueue    = new Queue('conform',   { connection: redisConn });
 								const importQueue     = new Queue('import',    { connection: redisConn });
 								const trimQueue       = new Queue('trim',      { connection: redisConn });
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
 								const QUEUES = [
-												feat: server-side filmstrip worker + fix scheduler crash + fix clip freeze

Root causes found:
1. Scheduler crashing every 15s: assets table has no error_message column.
   Fix: remove error_message from UPDATE in scheduler.js (#66 regression).

2. Clip freezing: client-side filmstrip seek loop runs on main thread,
   seeks same proxy the player is streaming → both stall → freeze.
   Fix: replace browser seek loop entirely with server-side FFmpeg worker.

3. No dedicated filmstrip worker: filmstrip was never pre-built server-side.

Changes:
- services/mam-api/src/db/migrations/018-add-filmstrip-s3-key.sql
  Add filmstrip_s3_key TEXT column to assets table

- services/worker/src/workers/filmstrip.js (new)
  BullMQ worker: downloads proxy, runs FFmpeg fps filter to extract
  28 evenly-spaced JPEG frames, base64-encodes them, uploads JSON
  array to S3 at filmstrips/<assetId>.json, stores key in DB

- services/worker/src/workers/thumbnail.js
  Queue filmstrip job automatically after thumbnail completes

- services/worker/src/index.js
  Register filmstrip worker (concurrency=2), export filmstripQueue
  singleton, close it on SIGTERM

- services/mam-api/src/routes/assets.js
  - filmstripQueue added
  - POST /reprocess?type=filmstrip now supported
  - GET /:id/filmstrip returns signed S3 URL for JSON frames

- services/mam-api/src/routes/jobs.js
  filmstrip queue visible in Jobs UI

- services/web-ui/public/screens-asset.jsx
  Replace browser seek loop with fetch of /assets/:id/filmstrip
  → fetch S3 JSON → render frames. Zero browser-side video seeking.
  Right-click and Files tab re-generate via API endpoint.

											
										
										
											2026-05-26 12:39:44 -04:00
+								  { queue: proxyQueue,      type: 'proxy'     },
 								  { queue: thumbnailQueue,  type: 'thumbnail' },
 								  { queue: filmstripQueue,  type: 'filmstrip' },
 								  { queue: conformQueue,    type: 'conform'   },
 								  { queue: importQueue,     type: 'import'    },
 								  { queue: trimQueue,       type: 'trim'      },
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								];
 								// BullMQ state → API status mapping
 								const STATE_MAP = {
-												fix: bulk-fetch jobs by state (no N+1 getState()); add GET /events SSE stream

											
										
										
											2026-05-19 23:09:47 -04:00
+								  waiting:   'waiting',
 								  active:    'active',
 								  completed: 'completed',
 								  failed:    'failed',
 								  delayed:   'waiting',
 								  paused:    'waiting',
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								};
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
-												fix: bulk-fetch jobs by state (no N+1 getState()); add GET /events SSE stream

											
										
										
											2026-05-19 23:09:47 -04:00
+								// Ordered state buckets used for bulk fetch — avoids N+1 getState() calls.
 								const STATE_BUCKETS = ['active', 'waiting', 'completed', 'failed', 'delayed', 'paused'];
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								function normalizeJob(bullJob, type, apiStatus) {
 								  const isCompleted = apiStatus === 'completed';
 								  const isFailed    = apiStatus === 'failed';
 								  return {
 								    id:           `${type}:${bullJob.id}`,
 								    type,
 								    status:       apiStatus,
 								    progress:     typeof bullJob.progress === 'number' ? bullJob.progress : 0,
 								    asset_id:     bullJob.data?.assetId    || null,
 								    asset_name:   bullJob.data?.assetName  || null,
 								    created_at:   bullJob.timestamp   ? new Date(bullJob.timestamp).toISOString()   : null,
 								    started_at:   bullJob.processedOn ? new Date(bullJob.processedOn).toISOString() : null,
 								    completed_at: isCompleted && bullJob.finishedOn ? new Date(bullJob.finishedOn).toISOString() : null,
 								    failed_at:    isFailed    && bullJob.finishedOn ? new Date(bullJob.finishedOn).toISOString() : null,
 								    error:        bullJob.failedReason || null,
 								    metadata:     bullJob.data || {},
 								  };
 								}
-												fix: bulk-fetch jobs by state (no N+1 getState()); add GET /events SSE stream

											
										
										
											2026-05-19 23:09:47 -04:00
+								// Fetch all jobs from all queues in bulk by state bucket (no per-job getState() calls).
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								async function getAllBullMQJobs() {
 								  const results = [];
 								  for (const { queue, type } of QUEUES) {
-												fix: bulk-fetch jobs by state (no N+1 getState()); add GET /events SSE stream

											
										
										
											2026-05-19 23:09:47 -04:00
+								    for (const bucket of STATE_BUCKETS) {
 								      try {
 								        const apiStatus = STATE_MAP[bucket] || bucket;
 								        const jobs = await queue.getJobs([bucket], 0, 200);
 								        for (const job of jobs) {
 								          results.push(normalizeJob(job, type, apiStatus));
 								        }
 								      } catch {
 								        // queue or bucket unavailable — skip
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								      }
 								    }
 								  }
 								  return results;
 								}
-												fix(jobs): backfill asset_name from DB so non-YouTube jobs show their asset

The Jobs screen only displayed an asset name when the enqueueing code
stuffed assetName into the BullMQ job data. YouTube imports did that;
upload-triggered proxy/thumbnail jobs didn't — so everything except
YouTube showed em dashes in the Asset column.

Fix it centrally: after we collect jobs from BullMQ, look up names
in one bulk SELECT against the assets table for any job that has an
assetId but no asset_name. Applies to /jobs, /jobs/:id, and the SSE
events stream. Lookup failures fall through silently rather than
500-ing the whole list.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-05-23 16:23:23 -04:00
+								// Mutate `jobs` in place to fill in asset_name from the assets table for any
 								// job that has an assetId but no inline assetName in its payload. One bulk
 								// SQL query per refresh — cheap, and means we don't have to remember to pass
 								// assetName at every enqueue site (upload.js, capture stop, scheduler, etc.).
 								async function attachAssetNames(jobs) {
 								  const idsNeedingLookup = [...new Set(
 								    jobs.filter(j => j.asset_id && !j.asset_name).map(j => j.asset_id)
 								  )];
 								  if (idsNeedingLookup.length === 0) return;
 								  let rows = [];
 								  try {
 								    const result = await pool.query(
 								      'SELECT id, display_name, filename FROM assets WHERE id = ANY($1::uuid[])',
 								      [idsNeedingLookup]
 								    );
 								    rows = result.rows;
 								  } catch {
 								    // If the lookup fails (DB down, bad UUID in a stale BullMQ payload), keep
 								    // serving jobs without names rather than 500-ing the whole list.
 								    return;
 								  }
 								  const byId = new Map(rows.map(r => [r.id, r.display_name || r.filename]));
 								  for (const j of jobs) {
 								    if (j.asset_id && !j.asset_name) {
 								      const name = byId.get(j.asset_id);
 								      if (name) j.asset_name = name;
 								    }
 								  }
 								}
-												fix: bulk-fetch jobs by state (no N+1 getState()); add GET /events SSE stream

											
										
										
											2026-05-19 23:09:47 -04:00
+								// ── GET /events – Server-Sent Events stream of live job updates ───────────────
 								router.get('/events', async (req, res) => {
 								  res.setHeader('Content-Type',      'text/event-stream');
 								  res.setHeader('Cache-Control',     'no-cache');
 								  res.setHeader('Connection',        'keep-alive');
-												feat: add POST /jobs/:id/retry endpoint for re-queuing failed BullMQ jobs

											
										
										
											2026-05-22 12:18:53 -04:00
+								  res.setHeader('X-Accel-Buffering', 'no');
-												fix: bulk-fetch jobs by state (no N+1 getState()); add GET /events SSE stream

											
										
										
											2026-05-19 23:09:47 -04:00
+								  res.flushHeaders();
 								  let closed = false;
 								  req.on('close', () => { closed = true; });
 								  const push = async () => {
 								    if (closed) return;
 								    try {
 								      const jobs = await getAllBullMQJobs();
-												fix(jobs): backfill asset_name from DB so non-YouTube jobs show their asset

The Jobs screen only displayed an asset name when the enqueueing code
stuffed assetName into the BullMQ job data. YouTube imports did that;
upload-triggered proxy/thumbnail jobs didn't — so everything except
YouTube showed em dashes in the Asset column.

Fix it centrally: after we collect jobs from BullMQ, look up names
in one bulk SELECT against the assets table for any job that has an
assetId but no asset_name. Applies to /jobs, /jobs/:id, and the SSE
events stream. Lookup failures fall through silently rather than
500-ing the whole list.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-05-23 16:23:23 -04:00
+								      await attachAssetNames(jobs);
-												fix: bulk-fetch jobs by state (no N+1 getState()); add GET /events SSE stream

											
										
										
											2026-05-19 23:09:47 -04:00
+								      if (!closed) res.write(`data: ${JSON.stringify({ type: 'jobs', jobs })}\n\n`);
 								    } catch (err) {
 								      if (!closed) res.write(`data: ${JSON.stringify({ type: 'error', message: err.message })}\n\n`);
 								    }
 								    if (!closed) setTimeout(push, 2000);
 								  };
 								  await push();
 								});
-												fix: close all 24 open issues (#40–#94)

Bug fixes:
- #91: dockerApi() 10s socket timeout (Docker daemon hang)
- #77: await syncToAmpp() with .catch() — no longer fire-and-forget
- #75: migration 016 — add 'proxy','import' to job_type enum; add 'completed' to job_status
- #73: BullMQ orphan job cleanup on hard asset delete
- #70: batch-trim jobs table gets expires_at; trim-status auto-expires stale rows
- #66: scheduler tick marks stale live assets (>2h) as error
- #63: migration 017 — partial unique index prevents concurrent live asset overwrite
- #61: recorders.js uses getS3Bucket() not stale process.env.S3_BUCKET
- #60: already fixed (copy nulls proxy/thumbnail keys, requeues proxy)
- #40: already fixed (All projects clears openProject)
- #64: already fixed (sourceType/needsProxy handled)
- #90: GET /jobs now includes DB jobs table (trim jobs visible in UI)
- #74: nginx Content-Type header preserved; multer 500MB file size limit
- #68: GET /upload returns in-progress ingesting assets
- #58: /stream and /video endpoints fall back to original file for all video types
- #55: recorder poll .catch() logs auth errors cleanly; redirect stops interval
- #52: thumb-status and thumb-duration moved inside position:relative wrapper
- #50: ProjectCard gets onContextMenu handler with rename/delete menu
- #49: project context menu dismisses on contextmenu + scroll events

Features:
- #93: POST /assets/:id/reprocess?type=proxy|thumbnail — force re-queue any asset
  Asset ⋯ menu now shows 'Re-generate proxy' and 'Re-generate thumbnail' buttons

UI:
- Logo: brightness(0) invert(1) filter applied consistently in sidebar, launcher,
  and login — white logo pops on dark UI; inline style removed from login.html

											
										
										
											2026-05-26 10:10:44 -04:00
+								// Fetch DB-tracked jobs (e.g. trim) and normalize to the same shape as BullMQ jobs.
 								// Only returns non-expired rows.
 								async function getDbJobs() {
 								  try {
 								    const result = await pool.query(
 								      `SELECT j.id, j.type, j.status, j.payload, j.created_at, j.updated_at,
 								              ts.asset_id
 								       FROM jobs j
 								       LEFT JOIN temp_segments ts ON ts.job_id = j.id
 								       WHERE (j.expires_at IS NULL OR j.expires_at > NOW())
 								       ORDER BY j.created_at DESC
 								       LIMIT 200`
 								    );
 								    // Dedupe — multiple temp_segments per job, take first asset_id found
 								    const seen = new Map();
 								    for (const row of result.rows) {
 								      if (!seen.has(row.id)) {
 								        seen.set(row.id, {
 								          id:           `trim:${row.id}`,
 								          type:         row.type,
 								          status:       row.status === 'completed' ? 'completed' : row.status,
 								          progress:     row.status === 'completed' ? 100 : (row.status === 'failed' ? 0 : 50),
 								          asset_id:     row.asset_id || null,
 								          asset_name:   null,
 								          created_at:   row.created_at ? new Date(row.created_at).toISOString() : null,
 								          started_at:   null,
 								          completed_at: row.status === 'completed' && row.updated_at ? new Date(row.updated_at).toISOString() : null,
 								          failed_at:    row.status === 'failed'    && row.updated_at ? new Date(row.updated_at).toISOString() : null,
 								          error:        null,
 								          metadata:     row.payload || {},
 								        });
 								      }
 								    }
 								    return [...seen.values()];
 								  } catch {
 								    return [];
 								  }
 								}
 								// ── GET / - List jobs (BullMQ queues + DB trim jobs) ─────────────────────────
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								router.get('/', async (req, res, next) => {
 								  try {
 								    const { type, status, asset_id } = req.query;
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								    let jobs = await getAllBullMQJobs();
-												fix: close all 24 open issues (#40–#94)

Bug fixes:
- #91: dockerApi() 10s socket timeout (Docker daemon hang)
- #77: await syncToAmpp() with .catch() — no longer fire-and-forget
- #75: migration 016 — add 'proxy','import' to job_type enum; add 'completed' to job_status
- #73: BullMQ orphan job cleanup on hard asset delete
- #70: batch-trim jobs table gets expires_at; trim-status auto-expires stale rows
- #66: scheduler tick marks stale live assets (>2h) as error
- #63: migration 017 — partial unique index prevents concurrent live asset overwrite
- #61: recorders.js uses getS3Bucket() not stale process.env.S3_BUCKET
- #60: already fixed (copy nulls proxy/thumbnail keys, requeues proxy)
- #40: already fixed (All projects clears openProject)
- #64: already fixed (sourceType/needsProxy handled)
- #90: GET /jobs now includes DB jobs table (trim jobs visible in UI)
- #74: nginx Content-Type header preserved; multer 500MB file size limit
- #68: GET /upload returns in-progress ingesting assets
- #58: /stream and /video endpoints fall back to original file for all video types
- #55: recorder poll .catch() logs auth errors cleanly; redirect stops interval
- #52: thumb-status and thumb-duration moved inside position:relative wrapper
- #50: ProjectCard gets onContextMenu handler with rename/delete menu
- #49: project context menu dismisses on contextmenu + scroll events

Features:
- #93: POST /assets/:id/reprocess?type=proxy|thumbnail — force re-queue any asset
  Asset ⋯ menu now shows 'Re-generate proxy' and 'Re-generate thumbnail' buttons

UI:
- Logo: brightness(0) invert(1) filter applied consistently in sidebar, launcher,
  and login — white logo pops on dark UI; inline style removed from login.html

											
										
										
											2026-05-26 10:10:44 -04:00
+								    const dbJobs = await getDbJobs();
 								    jobs = jobs.concat(dbJobs);
-												fix(jobs): backfill asset_name from DB so non-YouTube jobs show their asset

The Jobs screen only displayed an asset name when the enqueueing code
stuffed assetName into the BullMQ job data. YouTube imports did that;
upload-triggered proxy/thumbnail jobs didn't — so everything except
YouTube showed em dashes in the Asset column.

Fix it centrally: after we collect jobs from BullMQ, look up names
in one bulk SELECT against the assets table for any job that has an
assetId but no asset_name. Applies to /jobs, /jobs/:id, and the SSE
events stream. Lookup failures fall through silently rather than
500-ing the whole list.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-05-23 16:23:23 -04:00
+								    await attachAssetNames(jobs);
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								    if (type)     jobs = jobs.filter(j => j.type === type);
 								    if (status)   jobs = jobs.filter(j => j.status === status);
 								    if (asset_id) jobs = jobs.filter(j => j.asset_id === asset_id);
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								    jobs.sort((a, b) => new Date(b.created_at || 0) - new Date(a.created_at || 0));
 								    res.json(jobs);
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								  } catch (err) {
 								    next(err);
 								  }
 								});
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								// ── GET /:id - Single job ─────────────────────────────────────────────────────
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								router.get('/:id', async (req, res, next) => {
 								  try {
 								    const { id } = req.params;
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								    const colonIdx = id.indexOf(':');
 								    const qType  = colonIdx > -1 ? id.slice(0, colonIdx) : null;
 								    const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;
 								    for (const { queue, type } of QUEUES) {
 								      if (qType && type !== qType) continue;
 								      try {
 								        const job = await queue.getJob(bullId);
 								        if (job) {
 								          const state = await job.getState();
 								          const apiStatus = STATE_MAP[state] || state;
-												fix(jobs): backfill asset_name from DB so non-YouTube jobs show their asset

The Jobs screen only displayed an asset name when the enqueueing code
stuffed assetName into the BullMQ job data. YouTube imports did that;
upload-triggered proxy/thumbnail jobs didn't — so everything except
YouTube showed em dashes in the Asset column.

Fix it centrally: after we collect jobs from BullMQ, look up names
in one bulk SELECT against the assets table for any job that has an
assetId but no asset_name. Applies to /jobs, /jobs/:id, and the SSE
events stream. Lookup failures fall through silently rather than
500-ing the whole list.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

											
										
										
											2026-05-23 16:23:23 -04:00
+								          const normalized = normalizeJob(job, type, apiStatus);
 								          await attachAssetNames([normalized]);
 								          return res.json(normalized);
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								        }
 								      } catch { /* try next queue */ }
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								    }
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								    res.status(404).json({ error: 'Job not found' });
 								  } catch (err) {
 								    next(err);
 								  }
 								});
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
-												feat: add POST /jobs/:id/retry endpoint for re-queuing failed BullMQ jobs

											
										
										
											2026-05-22 12:18:53 -04:00
+								// ── POST /:id/retry - Retry a failed job ──────────────────────────────────────
 								router.post('/:id/retry', async (req, res, next) => {
 								  try {
 								    const { id } = req.params;
 								    const colonIdx = id.indexOf(':');
 								    const qType  = colonIdx > -1 ? id.slice(0, colonIdx) : null;
 								    const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;
 								    for (const { queue, type } of QUEUES) {
 								      if (qType && type !== qType) continue;
 								      try {
 								        const job = await queue.getJob(bullId);
 								        if (job) {
 								          await job.retry();
 								          return res.json({ id, status: 'queued' });
 								        }
 								      } catch { /* try next queue */ }
 								    }
 								    res.status(404).json({ error: 'Job not found' });
 								  } catch (err) {
 								    next(err);
 								  }
 								});
-												fix(jobs): real cancel for active jobs + multi-threaded thumbnail worker

DELETE /jobs/:id was throwing "404 not found" when the operator tried to
cancel a running job. BullMQ refuses job.remove() while a job is in the
active state; the route caught that error and fell through to the
404 branch, which was misleading because the job actually exists — the
queue was just refusing to drop it from under the worker.

Fix:
- Detect 'active' state explicitly and call moveToFailed(err, '0', false)
  first. Token '0' bypasses the per-worker lock check (the operator-side
  cancel doesn't hold the worker lock). That transitions active -> failed
  and frees the queue's concurrency slot.
- If moveToFailed itself fails (lock owned by a live worker), fall back
  to job.discard() so at least the result is thrown away.
- If remove() then fails (stalled, broken state), drop the job's Redis
  key directly via queue.client. Last-resort obliteration.
- Stop swallowing getJob() errors — if Redis is sad, surface it via
  next(err) instead of returning a misleading 404.
- Return { cancelled: true } when the job was active, so the client
  can show "Cancelled" rather than "Removed" in any future toast.

While here: thumbnail jobs now run with concurrency 4 by default
(proxy 2, conform 1, import 1 unchanged). Every queue defaulted to
concurrency 1 before, so a single stalled job blocked the entire queue.
All three are overridable via PROXY_CONCURRENCY / THUMBNAIL_CONCURRENCY
/ CONFORM_CONCURRENCY env vars for nodes with more headroom.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 17:23:07 -04:00
+								// ── DELETE /:id - Remove a job (also handles cancel for active jobs) ─────────
 								// BullMQ refuses job.remove() while a job is in the 'active' state. Before this
 								// fix the route caught that error and fell through to a misleading 404, so
 								// operators couldn't kill a stalled-active job from the UI. Now we detect the
 								// active state explicitly: moveToFailed with the magic '0' token bypasses the
 								// per-worker lock check and transitions active → failed (freeing the queue's
 								// concurrency slot), then remove() drops the row.
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								router.delete('/:id', async (req, res, next) => {
 								  try {
 								    const { id } = req.params;
 								    const colonIdx = id.indexOf(':');
 								    const qType  = colonIdx > -1 ? id.slice(0, colonIdx) : null;
 								    const bullId = colonIdx > -1 ? id.slice(colonIdx + 1) : id;
-												fix(jobs): real cancel for active jobs + multi-threaded thumbnail worker

DELETE /jobs/:id was throwing "404 not found" when the operator tried to
cancel a running job. BullMQ refuses job.remove() while a job is in the
active state; the route caught that error and fell through to the
404 branch, which was misleading because the job actually exists — the
queue was just refusing to drop it from under the worker.

Fix:
- Detect 'active' state explicitly and call moveToFailed(err, '0', false)
  first. Token '0' bypasses the per-worker lock check (the operator-side
  cancel doesn't hold the worker lock). That transitions active -> failed
  and frees the queue's concurrency slot.
- If moveToFailed itself fails (lock owned by a live worker), fall back
  to job.discard() so at least the result is thrown away.
- If remove() then fails (stalled, broken state), drop the job's Redis
  key directly via queue.client. Last-resort obliteration.
- Stop swallowing getJob() errors — if Redis is sad, surface it via
  next(err) instead of returning a misleading 404.
- Return { cancelled: true } when the job was active, so the client
  can show "Cancelled" rather than "Removed" in any future toast.

While here: thumbnail jobs now run with concurrency 4 by default
(proxy 2, conform 1, import 1 unchanged). Every queue defaulted to
concurrency 1 before, so a single stalled job blocked the entire queue.
All three are overridable via PROXY_CONCURRENCY / THUMBNAIL_CONCURRENCY
/ CONFORM_CONCURRENCY env vars for nodes with more headroom.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 17:23:07 -04:00
+								    let lastErr = null;
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								    for (const { queue, type } of QUEUES) {
 								      if (qType && type !== qType) continue;
-												fix(jobs): real cancel for active jobs + multi-threaded thumbnail worker

DELETE /jobs/:id was throwing "404 not found" when the operator tried to
cancel a running job. BullMQ refuses job.remove() while a job is in the
active state; the route caught that error and fell through to the
404 branch, which was misleading because the job actually exists — the
queue was just refusing to drop it from under the worker.

Fix:
- Detect 'active' state explicitly and call moveToFailed(err, '0', false)
  first. Token '0' bypasses the per-worker lock check (the operator-side
  cancel doesn't hold the worker lock). That transitions active -> failed
  and frees the queue's concurrency slot.
- If moveToFailed itself fails (lock owned by a live worker), fall back
  to job.discard() so at least the result is thrown away.
- If remove() then fails (stalled, broken state), drop the job's Redis
  key directly via queue.client. Last-resort obliteration.
- Stop swallowing getJob() errors — if Redis is sad, surface it via
  next(err) instead of returning a misleading 404.
- Return { cancelled: true } when the job was active, so the client
  can show "Cancelled" rather than "Removed" in any future toast.

While here: thumbnail jobs now run with concurrency 4 by default
(proxy 2, conform 1, import 1 unchanged). Every queue defaulted to
concurrency 1 before, so a single stalled job blocked the entire queue.
All three are overridable via PROXY_CONCURRENCY / THUMBNAIL_CONCURRENCY
/ CONFORM_CONCURRENCY env vars for nodes with more headroom.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 17:23:07 -04:00
+								      let job;
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								      try {
-												fix(jobs): real cancel for active jobs + multi-threaded thumbnail worker

DELETE /jobs/:id was throwing "404 not found" when the operator tried to
cancel a running job. BullMQ refuses job.remove() while a job is in the
active state; the route caught that error and fell through to the
404 branch, which was misleading because the job actually exists — the
queue was just refusing to drop it from under the worker.

Fix:
- Detect 'active' state explicitly and call moveToFailed(err, '0', false)
  first. Token '0' bypasses the per-worker lock check (the operator-side
  cancel doesn't hold the worker lock). That transitions active -> failed
  and frees the queue's concurrency slot.
- If moveToFailed itself fails (lock owned by a live worker), fall back
  to job.discard() so at least the result is thrown away.
- If remove() then fails (stalled, broken state), drop the job's Redis
  key directly via queue.client. Last-resort obliteration.
- Stop swallowing getJob() errors — if Redis is sad, surface it via
  next(err) instead of returning a misleading 404.
- Return { cancelled: true } when the job was active, so the client
  can show "Cancelled" rather than "Removed" in any future toast.

While here: thumbnail jobs now run with concurrency 4 by default
(proxy 2, conform 1, import 1 unchanged). Every queue defaulted to
concurrency 1 before, so a single stalled job blocked the entire queue.
All three are overridable via PROXY_CONCURRENCY / THUMBNAIL_CONCURRENCY
/ CONFORM_CONCURRENCY env vars for nodes with more headroom.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 17:23:07 -04:00
+								        job = await queue.getJob(bullId);
 								      } catch (err) {
 								        // Queue-level lookup error: remember it so we don't mask it with 404.
 								        lastErr = err;
 								        continue;
 								      }
 								      if (!job) continue;
 								      const state = await job.getState();
 								      if (state === 'active') {
 								        // Token '0' tells BullMQ to skip the worker-lock check — necessary
 								        // because the operator-side cancel doesn't hold the worker's lock.
 								        try {
 								          await job.moveToFailed(new Error('Cancelled by operator'), '0', false);
 								        } catch (err) {
 								          // Lock owned by a still-living worker; fall back to discard + remove
 								          // so at least the result is thrown away and the row is gone.
 								          try { await job.discard(); } catch (_) {}
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								        }
-												fix(jobs): real cancel for active jobs + multi-threaded thumbnail worker

DELETE /jobs/:id was throwing "404 not found" when the operator tried to
cancel a running job. BullMQ refuses job.remove() while a job is in the
active state; the route caught that error and fell through to the
404 branch, which was misleading because the job actually exists — the
queue was just refusing to drop it from under the worker.

Fix:
- Detect 'active' state explicitly and call moveToFailed(err, '0', false)
  first. Token '0' bypasses the per-worker lock check (the operator-side
  cancel doesn't hold the worker lock). That transitions active -> failed
  and frees the queue's concurrency slot.
- If moveToFailed itself fails (lock owned by a live worker), fall back
  to job.discard() so at least the result is thrown away.
- If remove() then fails (stalled, broken state), drop the job's Redis
  key directly via queue.client. Last-resort obliteration.
- Stop swallowing getJob() errors — if Redis is sad, surface it via
  next(err) instead of returning a misleading 404.
- Return { cancelled: true } when the job was active, so the client
  can show "Cancelled" rather than "Removed" in any future toast.

While here: thumbnail jobs now run with concurrency 4 by default
(proxy 2, conform 1, import 1 unchanged). Every queue defaulted to
concurrency 1 before, so a single stalled job blocked the entire queue.
All three are overridable via PROXY_CONCURRENCY / THUMBNAIL_CONCURRENCY
/ CONFORM_CONCURRENCY env vars for nodes with more headroom.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 17:23:07 -04:00
+								      }
 								      try {
 								        await job.remove();
 								      } catch (err) {
 								        // Last-resort obliteration of the job row via raw Redis. This is
 								        // the path stalled jobs hit when moveToFailed couldn't transition
 								        // them either.
 								        const client = await queue.client;
 								        const prefix = queue.toKey(bullId);
 								        await client.del(prefix);
 								      }
 								      return res.json({ success: true, cancelled: state === 'active' });
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								    }
-												fix(jobs): real cancel for active jobs + multi-threaded thumbnail worker

DELETE /jobs/:id was throwing "404 not found" when the operator tried to
cancel a running job. BullMQ refuses job.remove() while a job is in the
active state; the route caught that error and fell through to the
404 branch, which was misleading because the job actually exists — the
queue was just refusing to drop it from under the worker.

Fix:
- Detect 'active' state explicitly and call moveToFailed(err, '0', false)
  first. Token '0' bypasses the per-worker lock check (the operator-side
  cancel doesn't hold the worker lock). That transitions active -> failed
  and frees the queue's concurrency slot.
- If moveToFailed itself fails (lock owned by a live worker), fall back
  to job.discard() so at least the result is thrown away.
- If remove() then fails (stalled, broken state), drop the job's Redis
  key directly via queue.client. Last-resort obliteration.
- Stop swallowing getJob() errors — if Redis is sad, surface it via
  next(err) instead of returning a misleading 404.
- Return { cancelled: true } when the job was active, so the client
  can show "Cancelled" rather than "Removed" in any future toast.

While here: thumbnail jobs now run with concurrency 4 by default
(proxy 2, conform 1, import 1 unchanged). Every queue defaulted to
concurrency 1 before, so a single stalled job blocked the entire queue.
All three are overridable via PROXY_CONCURRENCY / THUMBNAIL_CONCURRENCY
/ CONFORM_CONCURRENCY env vars for nodes with more headroom.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 17:23:07 -04:00
+								    if (lastErr) return next(lastErr);
-												fix(jobs): read from BullMQ queues instead of empty DB table

GET /api/v1/jobs now queries the proxy, thumbnail, and conform BullMQ
queues directly and returns normalized job objects with id, type,
status, progress, asset_id, timestamps, and error fields.

Also adds DELETE /:id to remove completed/failed jobs from the queue,
supporting the clearCompleted action in jobs.html.

The PostgreSQL jobs table is still used only for conform job creation
(POST /conform) to preserve that workflow.

											
										
										
											2026-05-16 17:38:53 -04:00
+								    res.status(404).json({ error: 'Job not found' });
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								  } catch (err) {
 								    next(err);
 								  }
 								});
-												fix: conform route broken SQL — remove dead DB insert, use BullMQ directly

The POST /conform route was inserting into the jobs table with non-existent
columns (project_id, metadata) and an invalid enum value ('pending'). Since
GET /jobs reads entirely from BullMQ, the DB insert was both incorrect and
redundant. Now we just enqueue the BullMQ job and return its ID.

											
										
										
											2026-05-18 23:22:14 -04:00
+								// ── POST /conform - Submit a conform (EDL export) job ────────────────────────
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								router.post('/conform', async (req, res, next) => {
 								  try {
 								    const { edl, project_id, output_format } = req.body;
 								    if (!edl || !project_id || !output_format) {
 								      return res.status(400).json({
 								        error: 'edl, project_id, and output_format are required',
 								      });
 								    }
-												fix: conform route broken SQL — remove dead DB insert, use BullMQ directly

The POST /conform route was inserting into the jobs table with non-existent
columns (project_id, metadata) and an invalid enum value ('pending'). Since
GET /jobs reads entirely from BullMQ, the DB insert was both incorrect and
redundant. Now we just enqueue the BullMQ job and return its ID.

											
										
										
											2026-05-18 23:22:14 -04:00
+								    const bullJob = await conformQueue.add('conform-task', {
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								      edl,
-												fix: conform route broken SQL — remove dead DB insert, use BullMQ directly

The POST /conform route was inserting into the jobs table with non-existent
columns (project_id, metadata) and an invalid enum value ('pending'). Since
GET /jobs reads entirely from BullMQ, the DB insert was both incorrect and
redundant. Now we just enqueue the BullMQ job and return its ID.

											
										
										
											2026-05-18 23:22:14 -04:00
+								      projectId:    project_id,
-												Fix jobs.js: send camelCase fields to conform worker (projectId/outputFormat)

											
										
										
											2026-05-16 00:46:45 -04:00
+								      outputFormat: output_format,
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								    });
-												fix: conform route broken SQL — remove dead DB insert, use BullMQ directly

The POST /conform route was inserting into the jobs table with non-existent
columns (project_id, metadata) and an invalid enum value ('pending'). Since
GET /jobs reads entirely from BullMQ, the DB insert was both incorrect and
redundant. Now we just enqueue the BullMQ job and return its ID.

											
										
										
											2026-05-18 23:22:14 -04:00
+								    res.status(202).json({ id: `conform:${bullJob.id}`, status: 'queued' });
-												add services/mam-api/src/routes/jobs.js

											
										
										
											2026-04-07 21:58:27 -04:00
+								  } catch (err) {
 								    next(err);
 								  }
 								});
 								export default router;