import express from 'express'; import http from 'http'; import os from 'os'; import pool from '../db/pool.js'; import { requireAdmin } from '../middleware/auth.js'; const router = express.Router(); // Hostname the primary mam-api self-registers as (mirrors selfHeartbeat()). const SELF_HOSTNAME = process.env.NODE_HOSTNAME || os.hostname(); // Format a process uptime (seconds) the way the Cluster UI expects — a short // human string like "3d 4h" / "12m". Workers don't report uptime today, so the // primary is the only row that populates this. function formatUptime(seconds) { const s = Math.floor(seconds); const d = Math.floor(s / 86400); const h = Math.floor((s % 86400) / 3600); const m = Math.floor((s % 3600) / 60); if (d > 0) return `${d}d ${h}h`; if (h > 0) return `${h}h ${m}m`; return `${m}m`; } // GET /onboard-info – admin-only. Supplies the Add Node wizard with the bits it // needs to build a `curl … | bash` onboarding command: the primary API URL the // remote node-agent should heartbeat to, the raw URL of onboard-node.sh, and // the deploy branch. apiUrl is a best guess the UI lets the operator edit. router.get('/onboard-info', requireAdmin, (req, res) => { const branch = process.env.DEPLOY_BRANCH || 'main'; const apiUrl = process.env.PUBLIC_API_URL || `${req.protocol}://${req.hostname}:${process.env.API_PORT || 47432}`; const scriptUrl = `https://forge.wilddragon.net/zgaetano/wild-dragon/raw/branch/${branch}/deploy/onboard-node.sh`; res.json({ apiUrl, scriptUrl, branch }); }); // If the agent reported Docker's default bridge IP (172.17.x) but the request // itself came from a real LAN address, prefer the request source IP instead. // We only check 172.17.x — the default docker0 bridge — not the full RFC1918 // 172.16/12 block, since real LANs (e.g. 172.18.91.x) fall in that range. function pickIp(reportedIp, reqIp) { const clean = (s) => (s || '').replace(/^::ffff:/, ''); const isDockerBridge = (ip) => /^172\.17\./.test(ip || ''); const r = clean(reqIp); if (!reportedIp) return r || null; if (isDockerBridge(reportedIp) && r && !isDockerBridge(r)) return r; return reportedIp; } function dockerRequest(path, method = 'GET', body = null) { return new Promise((resolve, reject) => { const opts = { socketPath: '/var/run/docker.sock', path: `/v1.41${path}`, method, headers: { 'Accept': 'application/json', 'Content-Type': 'application/json' }, }; const req = http.request(opts, (res) => { let data = ''; res.on('data', d => { data += d; }); res.on('end', () => { if (!data.trim()) return resolve(null); try { resolve(JSON.parse(data)); } catch (e) { resolve(null); } }); }); req.on('error', reject); req.setTimeout(5000, () => { req.destroy(); reject(new Error('Docker socket timeout')); }); if (body) req.write(JSON.stringify(body)); req.end(); }); } router.get('/', async (req, res, next) => { try { const r = await pool.query( `SELECT *, EXTRACT(EPOCH FROM (NOW() - last_seen)) AS stale_seconds FROM cluster_nodes ORDER BY registered_at ASC` ); res.json(r.rows.map(row => { const out = { ...row, online: Number(row.stale_seconds) < 120 }; // The primary (this mam-api host) does not heartbeat via the node-agent, // so its version/uptime are never populated. Self-populate them here so // the Cluster screen renders them like worker nodes instead of dashes. if (row.role === 'primary' && row.hostname === SELF_HOSTNAME) { out.version = process.env.npm_package_version || row.version || null; out.uptime = formatUptime(process.uptime()); } return out; })); } catch (err) { next(err); } }); router.get('/containers', async (req, res, next) => { try { const containers = await dockerRequest('/containers/json?all=true'); if (!Array.isArray(containers)) return res.json([]); const out = await Promise.all(containers.map(async c => { const rawName = (c.Names[0] || '').replace(/^\//, ''); const name = rawName.replace(/^wild-dragon-/, '').replace(/-\d+$/, ''); const ports = (c.Ports || []) .filter(p => p.PublicPort) .map(p => `${p.PublicPort}→${p.PrivatePort}`) .join(', '); // Live memory usage requires a per-container stats call (the list endpoint // doesn't include it). One extra Docker call each, but the list is small. // memory_stats.usage includes page cache; subtract it to match `docker stats`. let memBytes = null; if (c.State === 'running') { try { const stats = await dockerRequest(`/containers/${c.Id}/stats?stream=false`); const ms = stats && stats.memory_stats; if (ms && typeof ms.usage === 'number') { const cache = (ms.stats && ms.stats.cache) || 0; memBytes = ms.usage - cache; } } catch (_) { memBytes = null; } } return { id: c.Id.slice(0, 12), name, image: (c.Image || '').replace(/^sha256:/, '').slice(0, 40), state: c.State, uptime: (c.Status || '').replace(/\s*\(.*\)/, '').trim(), healthy: (c.Status || '').includes('healthy'), ports, cpu: 0, memBytes, }; })); res.json(out); } catch (err) { if (err.code === 'ENOENT' || err.code === 'EACCES') return res.json([]); next(err); } }); router.post('/containers/:nameOrId/restart', async (req, res, next) => { try { await dockerRequest(`/containers/${encodeURIComponent(req.params.nameOrId)}/restart`, 'POST'); res.json({ ok: true }); } catch (err) { next(err); } }); router.post('/heartbeat', async (req, res, next) => { try { const { hostname, ip_address, role = 'worker', version, api_url, cpu_usage, mem_used_mb, mem_total_mb, capabilities, metadata, metrics, } = req.body; if (!hostname) return res.status(400).json({ error: 'hostname is required' }); if (process.env.AUTH_ENABLED === 'true') { const bound = req.tokenBoundHostname; if (bound && bound !== hostname) { return res.status(403).json({ error: `Token is bound to "${bound}" but heartbeat reported "${hostname}"`, }); } if (!bound && req.user?.role !== 'admin') { return res.status(403).json({ error: 'Heartbeat requires a node-bound token or admin session', }); } } const effectiveIp = pickIp(ip_address, req.ip || req.socket?.remoteAddress); const r = await pool.query( `INSERT INTO cluster_nodes (hostname, ip_address, role, version, api_url, cpu_usage, mem_used_mb, mem_total_mb, last_seen, last_seen_at, capabilities, metadata, metrics) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,NOW(),NOW(),$9,$10,$11) ON CONFLICT (hostname) DO UPDATE SET ip_address = EXCLUDED.ip_address, role = EXCLUDED.role, version = EXCLUDED.version, api_url = EXCLUDED.api_url, cpu_usage = EXCLUDED.cpu_usage, mem_used_mb = EXCLUDED.mem_used_mb, mem_total_mb = EXCLUDED.mem_total_mb, last_seen = NOW(), last_seen_at = NOW(), capabilities = EXCLUDED.capabilities, metadata = EXCLUDED.metadata, metrics = COALESCE(EXCLUDED.metrics, cluster_nodes.metrics) RETURNING *`, [ hostname, effectiveIp, role, version || null, api_url || null, cpu_usage != null ? cpu_usage : null, mem_used_mb != null ? mem_used_mb : null, mem_total_mb != null ? mem_total_mb : null, capabilities != null ? JSON.stringify(capabilities) : '{}', metadata != null ? JSON.stringify(metadata) : null, metrics != null ? JSON.stringify(metrics) : null, ] ); res.json(r.rows[0]); } catch (err) { next(err); } }); router.get('/devices/blackmagic/signal', async (req, res, next) => { try { const nodesResult = await pool.query( `SELECT id, hostname, ip_address, api_url, capabilities, EXTRACT(EPOCH FROM (NOW() - last_seen)) AS stale_seconds FROM cluster_nodes WHERE capabilities IS NOT NULL` ); const recResult = await pool.query( `SELECT id, name, status, container_id, node_id, device_index, source_config FROM recorders WHERE source_type = 'sdi' AND node_id IS NOT NULL` ); const recByPort = new Map(); for (const r of recResult.rows) { const devIdx = r.device_index ?? r.source_config?.device ?? 0; recByPort.set(`${r.node_id}:${devIdx}`, r); } const tasks = []; for (const node of nodesResult.rows) { const nodeOnline = Number(node.stale_seconds) < 120; const bm = (node.capabilities && node.capabilities.blackmagic) || []; const model = (node.capabilities && node.capabilities.blackmagic_model) || null; const localHostname = process.env.NODE_HOSTNAME || ''; const isRemote = node.api_url && node.hostname !== localHostname; bm.forEach((d, idx) => { const portIndex = d.index !== undefined ? d.index : idx; const rec = recByPort.get(`${node.id}:${portIndex}`); tasks.push((async () => { const base = { node_id: node.id, hostname: node.hostname, index: portIndex, device: d.device || null, model, node_online: nodeOnline, recorder_id: rec ? rec.id : null, recorder_name: rec ? rec.name : null, recorder_status: rec ? rec.status : null, signal: 'no-recorder', framesReceived: null, currentFps: null, }; if (!rec || rec.status !== 'recording' || !rec.container_id) { if (rec && rec.status !== 'recording') base.signal = 'idle'; return base; } try { let live = null; if (isRemote) { const r = await fetch(`${node.api_url}/sidecar/${rec.container_id}/status`, { signal: AbortSignal.timeout(2500) }); if (r.ok) live = (await r.json()).live; } else { const r = await fetch(`http://recorder-${rec.id}:3001/capture/status`, { signal: AbortSignal.timeout(2000) }); if (r.ok) live = await r.json(); } if (live && live.signal) { base.signal = live.signal; base.framesReceived = live.framesReceived ?? null; base.currentFps = live.currentFps ?? null; } else { base.signal = 'connecting'; } } catch (_) { base.signal = 'connecting'; } return base; })()); }); } const results = await Promise.all(tasks); res.json(results); } catch (err) { next(err); } }); router.get('/devices/blackmagic', async (req, res, next) => { try { const r = await pool.query( `SELECT id, hostname, ip_address, role, capabilities, EXTRACT(EPOCH FROM (NOW() - last_seen)) AS stale_seconds FROM cluster_nodes WHERE capabilities IS NOT NULL` ); const out = []; for (const row of r.rows) { const online = Number(row.stale_seconds) < 120; const bm = (row.capabilities && row.capabilities.blackmagic) || []; const model = (row.capabilities && row.capabilities.blackmagic_model) || null; bm.forEach((d, idx) => { out.push({ node_id: row.id, hostname: row.hostname, ip_address: row.ip_address, role: row.role, online, model, index: d.index !== undefined ? d.index : idx, device: d.device }); }); } res.json(out); } catch (err) { next(err); } }); router.get('/devices/deltacast', async (req, res, next) => { try { const r = await pool.query( `SELECT id, hostname, ip_address, role, capabilities, EXTRACT(EPOCH FROM (NOW() - last_seen)) AS stale_seconds FROM cluster_nodes WHERE capabilities IS NOT NULL` ); const out = []; for (const row of r.rows) { const online = Number(row.stale_seconds) < 120; const dc = (row.capabilities && row.capabilities.deltacast) || []; const model = (row.capabilities && row.capabilities.deltacast_model) || null; dc.forEach((d, idx) => { out.push({ node_id: row.id, hostname: row.hostname, ip_address: row.ip_address, role: row.role, online, model: model || 'Deltacast', index: d.index !== undefined ? d.index : idx, device: d.device, present: d.present !== false, port_count: dc.length }); }); } res.json(out); } catch (err) { next(err); } }); router.get('/devices/deltacast/signal', async (req, res, next) => { try { const [nodesRes, recordersRes] = await Promise.all([ pool.query(`SELECT id, hostname, ip_address, api_url, capabilities, EXTRACT(EPOCH FROM (NOW() - last_seen)) AS stale_seconds FROM cluster_nodes WHERE capabilities IS NOT NULL`), pool.query(`SELECT id, node_id, device_index, status, source_type, container_id FROM recorders WHERE source_type = 'deltacast'`), ]); const recByNodePort = {}; for (const rec of recordersRes.rows) { recByNodePort[`${rec.node_id}:${rec.device_index}`] = rec; } const results = []; const fetchPromises = []; for (const node of nodesRes.rows) { const online = Number(node.stale_seconds) < 120; const dc = (node.capabilities && node.capabilities.deltacast) || []; const model = (node.capabilities && node.capabilities.deltacast_model) || 'Deltacast'; for (const port of dc) { const idx = port.index !== undefined ? port.index : dc.indexOf(port); const rec = recByNodePort[`${node.id}:${idx}`]; const base = { node_id: node.id, hostname: node.hostname, ip_address: node.ip_address, online, model, index: idx, device: port.device, present: port.present !== false, recorder_id: rec ? rec.id : null, recorder_status: rec ? rec.status : null, signal: 'no-recorder', framesReceived: null, currentFps: null }; if (!rec) { results.push(base); continue; } if (rec.status !== 'recording') { base.signal = 'idle'; results.push(base); continue; } const fetchIdx = results.length; results.push(base); fetchPromises.push((async () => { try { const url = node.api_url ? `${node.api_url}/sidecar/${rec.container_id}/status` : `http://recorder-${rec.id}:3001/capture/status`; const r = await fetch(url, { signal: AbortSignal.timeout(2500) }); if (r.ok) { const live = await r.json(); if (live && live.signal) { results[fetchIdx].signal = live.signal; results[fetchIdx].framesReceived = live.framesReceived ?? null; results[fetchIdx].currentFps = live.currentFps ?? null; } } } catch (_) { results[fetchIdx].signal = 'connecting'; } })()); } } await Promise.all(fetchPromises); res.json(results); } catch (err) { next(err); } }); router.get('/:id/ping', async (req, res, next) => { try { const r = await pool.query('SELECT id, hostname, api_url FROM cluster_nodes WHERE id = $1', [req.params.id]); if (r.rowCount === 0) return res.status(404).json({ error: 'Node not found' }); const node = r.rows[0]; if (!node.api_url) return res.json({ reachable: false, reason: 'no api_url registered' }); const start = Date.now(); try { const upstream = await fetch(`${node.api_url}/health`, { signal: AbortSignal.timeout(4000) }); const latency_ms = Date.now() - start; const body = await upstream.json().catch(() => ({})); res.json({ reachable: upstream.ok, latency_ms, status: upstream.status, agent: body }); } catch (err) { res.json({ reachable: false, latency_ms: Date.now() - start, reason: err.message }); } } catch (err) { next(err); } }); // ── Capture-driver / SDK deployment ──────────────────────────────────────── // Admins install/update vendor capture-card drivers on a node from the UI. // We resolve the node's api_url (like /:id/ping) and forward to its node-agent, // which runs deploy/install-driver.sh in a privileged one-shot // container against the host kernel. Vendor is allowlisted here AND on the // agent. We never echo the agent token or proprietary paths back to the client. const DRIVER_VENDORS = ['blackmagic', 'aja', 'deltacast', 'ndi']; // Bearer the agent expects (its NODE_TOKEN). Configured server-side; never // derived from client input and never returned to the browser. function agentAuthHeaders() { const tok = process.env.NODE_AGENT_TOKEN || ''; return tok ? { Authorization: `Bearer ${tok}` } : {}; } async function resolveNode(id) { const r = await pool.query('SELECT id, hostname, api_url, capabilities FROM cluster_nodes WHERE id = $1', [id]); return r.rowCount === 0 ? null : r.rows[0]; } router.get('/:id/driver-status', requireAdmin, async (req, res, next) => { try { const node = await resolveNode(req.params.id); if (!node) return res.status(404).json({ error: 'Node not found' }); if (!node.api_url) return res.status(409).json({ error: 'Node has no api_url registered' }); try { const upstream = await fetch(`${node.api_url}/driver/status`, { headers: agentAuthHeaders(), signal: AbortSignal.timeout(6000), }); const body = await upstream.json().catch(() => ({})); if (!upstream.ok) { return res.status(502).json({ error: 'Agent driver-status failed', status: upstream.status }); } res.json(body); } catch (err) { res.status(502).json({ error: 'Node unreachable', reason: err.message }); } } catch (err) { next(err); } }); router.post('/:id/install-driver', requireAdmin, async (req, res, next) => { try { const vendor = String(req.body?.vendor || '').toLowerCase(); if (!DRIVER_VENDORS.includes(vendor)) { return res.status(400).json({ error: `Invalid vendor (allowed: ${DRIVER_VENDORS.join(', ')})` }); } const node = await resolveNode(req.params.id); if (!node) return res.status(404).json({ error: 'Node not found' }); if (!node.api_url) return res.status(409).json({ error: 'Node has no api_url registered' }); try { // DKMS builds can take minutes — generous timeout. const upstream = await fetch(`${node.api_url}/driver/install`, { method: 'POST', headers: { 'Content-Type': 'application/json', ...agentAuthHeaders() }, body: JSON.stringify({ vendor }), signal: AbortSignal.timeout(600000), }); const body = await upstream.json().catch(() => ({})); // Relay logs/result. install-driver.sh never echoes secrets; the agent // returns only its structured [install-driver] log lines + status. res.status(upstream.ok ? 200 : 502).json({ ok: !!body.ok, vendor, exitCode: body.exitCode ?? null, rebootRequired: !!body.rebootRequired, status: body.status ?? null, logs: typeof body.logs === 'string' ? body.logs : '', error: body.ok ? undefined : (body.error || 'Install failed — see logs'), }); } catch (err) { res.status(502).json({ error: 'Node unreachable or install timed out', reason: err.message }); } } catch (err) { next(err); } }); router.get('/metrics', async (req, res, next) => { try { const r = await pool.query( `SELECT id, hostname, role, last_seen, cpu_usage, mem_used_mb, mem_total_mb, capabilities, metrics, EXTRACT(EPOCH FROM (NOW() - last_seen)) AS stale_seconds FROM cluster_nodes ORDER BY registered_at ASC` ); const nodes = r.rows.map(row => { const capGpus = (row.capabilities && row.capabilities.gpus) || []; const liveGpus = (row.metrics && row.metrics.gpus) || []; const gpus = capGpus.map((g, idx) => { const live = liveGpus.find(l => l.index === g.index) || liveGpus[idx] || {}; return { name: g.name || null, util_pct: live.util_pct != null ? live.util_pct : null, memory_used_mb: live.memory_used_mb != null ? live.memory_used_mb : null, memory_total_mb: g.memory_mb != null ? g.memory_mb : (live.memory_total_mb ?? null) }; }); for (const lg of liveGpus) { if (!capGpus.some(g => g.index === lg.index)) { gpus.push({ name: lg.name || null, util_pct: lg.util_pct != null ? lg.util_pct : null, memory_used_mb: lg.memory_used_mb != null ? lg.memory_used_mb : null, memory_total_mb: lg.memory_total_mb != null ? lg.memory_total_mb : null }); } } return { id: row.id, hostname: row.hostname, role: row.role, online: Number(row.stale_seconds) < 120, last_seen: row.last_seen, cpu_util_pct: row.cpu_usage != null ? Number(row.cpu_usage) : null, ram_used_mb: row.mem_used_mb != null ? row.mem_used_mb : null, ram_total_mb: row.mem_total_mb != null ? row.mem_total_mb : null, gpus }; }); res.json({ nodes }); } catch (err) { next(err); } }); router.delete('/:id', async (req, res, next) => { try { const r = await pool.query('DELETE FROM cluster_nodes WHERE id = $1 RETURNING id', [req.params.id]); if (r.rowCount === 0) return res.status(404).json({ error: 'Node not found' }); res.json({ ok: true }); } catch (err) { next(err); } }); export default router;