feat(gpu+capture): nvenc HLS preview, source-backend abstraction, GPU affinity+telemetry

#164 HLS preview uses h264_nvenc (forced-IDR, GOP=segment) when the sidecar has the GPU, else keeps libx264 fallback. #168 source-backend abstraction in capture-manager (blackmagic implemented as a behavior-preserving refactor; deltacast/aja stubbed pending hardware). #167 per-recorder gpu_uuid (migration 032) plumbed mam-api->agent-> NVIDIA_VISIBLE_DEVICES (defaults to 'all'). #166 node-agent reports encoder util + NVENC session count per GPU; Cluster screen renders per-GPU GPU/ENC util, VRAM, sessions. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 18:38:56 -04:00 · 2026-05-31 18:38:56 -04:00 · 08499b93b2
commit 08499b93b2
parent ca1eec0600
5 changed files with 291 additions and 67 deletions
--- a/services/capture/src/capture-manager.js
+++ b/services/capture/src/capture-manager.js
@ -133,6 +133,59 @@ const VIDEO_CODECS = {
  },
 };

+// nvenc codecs available in the capture image. Used both to validate the master
+// codec and (issue #164) as the GPU-availability signal for the HLS preview.
+const NVENC_CODECS = new Set(['h264_nvenc', 'hevc_nvenc']);
+
+// ── GPU availability for this sidecar (issue #164) ───────────────────────
+// The HLS monitor preview should be GPU-encoded (h264_nvenc) when — and only
+// when — the GPU is actually attached to this capture container. A non-GPU
+// recorder must keep using libx264, otherwise ffmpeg would fail to open the
+// nvenc encoder and break the preview.
+//
+// Two signals, OR'd for robustness:
+//   1) The master video codec is an nvenc codec. recorders.js derives `useGpu`
+//      from exactly this (GPU_CODECS = [hevc_nvenc, h264_nvenc]) and node-agent
+//      only attaches the NVIDIA runtime when useGpu is set — so an nvenc master
+//      codec is a reliable proxy for "this sidecar has the GPU".
+//   2) node-agent injects NVIDIA_VISIBLE_DEVICES into the sidecar env whenever
+//      useGpu is set. This is the most direct in-process evidence the runtime
+//      attached a GPU, and covers the (currently unused) case where the GPU is
+//      present but the master codec is a CPU codec.
+function gpuAvailableForPreview(masterCodec) {
+  if (NVENC_CODECS.has(masterCodec)) return true;
+  const vis = process.env.NVIDIA_VISIBLE_DEVICES;
+  if (vis && vis !== 'void' && vis !== 'none') return true;
+  return false;
+}
+
+// Build the HLS preview video-encode args. `segTime` is the HLS segment length
+// (seconds); we pin the GOP/keyframe interval to one IDR per segment so every
+// segment starts on a keyframe (misaligned keyframes were the root cause of the
+// playout preview black/flashing bug — keep the preview robust).
+function buildHlsVideoArgs(masterCodec, framerate) {
+  // Frames-per-segment for keyframe alignment. The SDI preview runs at the
+  // capture framerate; default to 30 (matches the test-card rate) when unknown.
+  const fps = Number.parseFloat(framerate) || 30;
+  const segTime = 2;                       // matches -hls_time below
+  const gop = Math.max(1, Math.round(fps * segTime));
+  if (gpuAvailableForPreview(masterCodec)) {
+    // Low-latency NVENC preset (p1 + ll tune). forced-idr + a keyframe every GOP
+    // frames keeps segment boundaries on IDR frames so hls.js can sync cleanly.
+    return [
+      '-c:v', 'h264_nvenc', '-preset', 'p1', '-tune', 'll',
+      '-pix_fmt', 'yuv420p', '-b:v', '2M',
+      '-g', String(gop), '-forced-idr', '1', '-sc_threshold', '0',
+    ];
+  }
+  // No GPU → keep the original CPU encode (must not break a non-GPU recorder).
+  return [
+    '-c:v', 'libx264', '-preset', 'veryfast', '-tune', 'zerolatency',
+    '-pix_fmt', 'yuv420p', '-b:v', '2M',
+    '-g', String(gop), '-sc_threshold', '0',
+  ];
+}
+
 const AUDIO_CODECS = {
  pcm_s16le:   { args: ['-c:a', 'pcm_s16le'],   bitrateControl: false },
  pcm_s24le:   { args: ['-c:a', 'pcm_s24le'],   bitrateControl: false },
@ -182,6 +235,88 @@ const GROWING_VIDEO_ARGS = [
 ];
 const GROWING_EXT = 'mxf';

+// ── Source-backend abstraction (issue #168) ──────────────────────────────
+// The capture input was historically hard-wired to a single `-f decklink -i …`
+// construction. To allow other SDI capture cards (Deltacast, AJA) to be added
+// later without touching the encode/output/HLS pipeline, the per-backend FFmpeg
+// INPUT-arg construction now lives behind this map. Each backend exposes:
+//
+//   buildInput(ctx) -> { inputArgs, isNetwork }   (may be async)
+//
+// where `ctx` carries the resolved recorder fields the backend needs (device).
+// The rest of capture-manager consumes the returned `inputArgs` unchanged, so
+// adding a backend is purely additive.
+//
+// IMPORTANT: `blackmagic` is a behaviour-preserving extraction of the previous
+// default DeckLink path — for an existing DeckLink recorder the produced ffmpeg
+// input args are byte-for-byte identical to the pre-refactor code. The
+// `deltacast`/`aja` entries are stubs that throw until the hardware/SDK plumbing
+// lands.
+const sourceBackends = {
+  // BlackMagic DeckLink over SDI (the only backend implemented today).
+  // device may be an integer index (0-based) or a full device name string.
+  // FFmpeg 7.x DeckLink requires the full name (e.g. 'DeckLink Duo 2 (2)').
+  // Map integer index -> name using ffmpeg -sources decklink at runtime.
+  //
+  // ffmpeg -sources decklink output format:
+  //   Auto-detected sources for decklink:
+  //     DeckLink Duo 2
+  //     DeckLink Duo 2 (2)
+  // Lines containing device names start with whitespace; the header line
+  // starts with a non-space character. Previous code used a v4l2-style
+  // hex-address regex that never matched DeckLink output → index 1+ always
+  // fell through to a wrong fallback, producing black output from port 2+.
+  blackmagic: {
+    async buildInput({ device }) {
+      let deckLinkName = String(device);
+      if (typeof device === 'number' || /^\d+$/.test(String(device))) {
+        const idx = parseInt(device, 10);
+        try {
+          const { execSync } = await import('child_process');
+          const out = execSync('ffmpeg -hide_banner -sources decklink 2>&1', { encoding: 'utf-8', timeout: 5000 });
+          const names = [];
+          for (const line of out.split('\n')) {
+            // DeckLink source lines: "  81:76669a80:00000000 [DeckLink Duo (1)] (none)"
+            const m = line.match(/^\s+[0-9a-f:]+\s+\[([^\]]+)\]/);
+            if (m) names.push(m[1]);
+          }
+          if (names[idx]) {
+            deckLinkName = names[idx];
+            console.log(`[capture] DeckLink index ${idx} → "${deckLinkName}" (from ${names.length} detected: ${names.join(', ')})`);
+          } else {
+            // Fallback: cannot determine model name without enumeration.
+            // Log a warning — operator should check the detected device list.
+            console.warn(`[capture] DeckLink index ${idx} out of range (detected ${names.length} devices: ${names.join(', ')}). Falling back to index-only input — capture may fail.`);
+            deckLinkName = `DeckLink (${idx})`;
+          }
+        } catch (err) {
+          console.warn(`[capture] ffmpeg -sources decklink failed: ${err.message}. Using index ${device} directly.`);
+          // Pass the numeric index directly; some ffmpeg builds accept it.
+          deckLinkName = String(device);
+        }
+      }
+      return {
+        inputArgs: ['-f', 'decklink', '-i', deckLinkName],
+        isNetwork: false,
+      };
+    },
+  },
+
+  // Stubs — hardware/SDK plumbing not yet implemented. These throw clearly so a
+  // misconfigured recorder fails fast instead of silently falling back to the
+  // wrong card.
+  deltacast: {
+    buildInput() {
+      throw new Error('deltacast backend not yet implemented — requires hardware');
+    },
+  },
+  aja: {
+    buildInput() {
+      throw new Error('aja backend not yet implemented — requires hardware');
+    },
+  },
+};
+
 function buildEncodeArgs({
  codec, videoBitrate, framerate,
  audioCodec, audioBitrate, audioChannels,
@ -257,7 +392,7 @@ class CaptureManager {
   * Returns { inputArgs, isNetwork }
   * @private
   */
-  async _buildInputArgs({ sourceType, device, sourceUrl, listen, listenPort, streamKey }) {
+  async _buildInputArgs({ sourceType, sourceBackend = 'blackmagic', device, sourceUrl, listen, listenPort, streamKey }) {
    if (sourceType === 'srt') {
      let url;
      if (listen) {
@ -321,50 +456,15 @@ class CaptureManager {
      }
    }

-    // Default: SDI via DeckLink
-    // device may be an integer index (0-based) or a full device name string.
-    // FFmpeg 7.x DeckLink requires the full name (e.g. 'DeckLink Duo 2 (2)').
-    // Map integer index -> name using ffmpeg -sources decklink at runtime.
-    //
-    // ffmpeg -sources decklink output format:
-    //   Auto-detected sources for decklink:
-    //     DeckLink Duo 2
-    //     DeckLink Duo 2 (2)
-    // Lines containing device names start with whitespace; the header line
-    // starts with a non-space character. Previous code used a v4l2-style
-    // hex-address regex that never matched DeckLink output → index 1+ always
-    // fell through to a wrong fallback, producing black output from port 2+.
-    let deckLinkName = String(device);
-    if (typeof device === 'number' || /^\d+$/.test(String(device))) {
-      const idx = parseInt(device, 10);
-      try {
-        const { execSync } = await import('child_process');
-        const out = execSync('ffmpeg -hide_banner -sources decklink 2>&1', { encoding: 'utf-8', timeout: 5000 });
-        const names = [];
-        for (const line of out.split('\n')) {
-          // DeckLink source lines: "  81:76669a80:00000000 [DeckLink Duo (1)] (none)"
-          const m = line.match(/^\s+[0-9a-f:]+\s+\[([^\]]+)\]/);
-          if (m) names.push(m[1]);
-        }
-        if (names[idx]) {
-          deckLinkName = names[idx];
-          console.log(`[capture] DeckLink index ${idx} → "${deckLinkName}" (from ${names.length} detected: ${names.join(', ')})`);
-        } else {
-          // Fallback: cannot determine model name without enumeration.
-          // Log a warning — operator should check the detected device list.
-          console.warn(`[capture] DeckLink index ${idx} out of range (detected ${names.length} devices: ${names.join(', ')}). Falling back to index-only input — capture may fail.`);
-          deckLinkName = `DeckLink (${idx})`;
-        }
-      } catch (err) {
-        console.warn(`[capture] ffmpeg -sources decklink failed: ${err.message}. Using index ${device} directly.`);
-        // Pass the numeric index directly; some ffmpeg builds accept it.
-        deckLinkName = String(device);
-      }
+    // Default: SDI via a pluggable source backend (issue #168). The backend
+    // selection defaults to `blackmagic` (DeckLink) so existing SDI recorders
+    // behave exactly as before. Deltacast/AJA backends throw until their
+    // hardware/SDK plumbing lands.
+    const backend = sourceBackends[sourceBackend];
+    if (!backend) {
+      throw new Error(`Unknown source backend "${sourceBackend}" — expected one of: ${Object.keys(sourceBackends).join(', ')}`);
    }
-    return {
-      inputArgs: ['-f', 'decklink', '-i', deckLinkName],
-      isNetwork: false,
-    };
+    return await backend.buildInput({ device });
  }

  /**
@ -380,6 +480,9 @@ class CaptureManager {
    clipName,
    device,
    sourceType = 'sdi',
+    // Source-backend selection for SDI capture (issue #168). Defaults to
+    // `blackmagic` (DeckLink) so existing recorders are unaffected.
+    sourceBackend = 'blackmagic',
    sourceUrl,
    listen = false,
    listenPort,
@ -447,7 +550,7 @@ class CaptureManager {
    const startedAt = new Date().toISOString();

    const { inputArgs, isNetwork } = await this._buildInputArgs({
-      sourceType, device, sourceUrl, listen, listenPort, streamKey,
+      sourceType, sourceBackend, device, sourceUrl, listen, listenPort, streamKey,
    });

    const hiresCodecArgs = buildEncodeArgs({
@ -506,10 +609,12 @@ class CaptureManager {
        '-map', '[vhi]', '-map', '0:a:0?',
        ...hiresCodecArgs,
        hiresOutput,
-        // Output 1 — low-latency H.264 HLS preview for the UI monitor
+        // Output 1 — low-latency H.264 HLS preview for the UI monitor.
+        // GPU-encoded (h264_nvenc) when the GPU is attached to this sidecar,
+        // otherwise libx264 (issue #164). GOP is pinned to one IDR per HLS
+        // segment so segments start on keyframes (avoids black/flashing).
        '-map', '[vlo]', '-map', '0:a:0?',
-        '-c:v', 'libx264', '-preset', 'veryfast', '-tune', 'zerolatency',
-        '-pix_fmt', 'yuv420p', '-b:v', '2M', '-g', '60', '-sc_threshold', '0',
+        ...buildHlsVideoArgs(videoCodec, framerate),
        '-c:a', 'aac', '-b:a', '128k', '-ar', '44100',
        '-f', 'hls', '-hls_time', '2', '-hls_list_size', '15',
        '-hls_flags', 'delete_segments+append_list+omit_endlist',
@ -541,8 +646,8 @@ class CaptureManager {
        const hlsArgs = [
          ...inputArgs,
          '-map', '0:v:0?', '-map', '0:a:0?',
-          '-c:v', 'libx264', '-preset', 'veryfast', '-tune', 'zerolatency',
-          '-pix_fmt', 'yuv420p', '-b:v', '2M', '-g', '60', '-sc_threshold', '0',
+          // GPU-gated preview encode, same as the SDI 2nd-output path (#164).
+          ...buildHlsVideoArgs(videoCodec, framerate),
          '-c:a', 'aac', '-b:a', '128k', '-ar', '44100',
          '-f', 'hls', '-hls_time', '2', '-hls_list_size', '15',
          '-hls_flags', 'delete_segments+append_list+omit_endlist',
@ -756,4 +861,4 @@ class CaptureManager {
 }

 export default new CaptureManager();
-export { VIDEO_CODECS, AUDIO_CODECS, CONTAINER_FMT, CONTAINER_EXT };
+export { VIDEO_CODECS, AUDIO_CODECS, CONTAINER_FMT, CONTAINER_EXT, sourceBackends };
--- a/services/mam-api/src/db/migrations/032-recorder-gpu-affinity.sql
+++ b/services/mam-api/src/db/migrations/032-recorder-gpu-affinity.sql
@ -0,0 +1,10 @@
+-- Migration 032: Per-recorder GPU affinity (Issue #167)
+-- Adds a nullable GPU UUID to the recorders table so each recorder can be
+-- pinned to a specific GPU on its node. The value is passed through to the
+-- node-agent sidecar-start payload and becomes NVIDIA_VISIBLE_DEVICES for the
+-- capture container. NULL = legacy behavior (NVIDIA_VISIBLE_DEVICES=all, i.e.
+-- every GPU visible). Accepts an nvidia-smi GPU UUID (e.g. "GPU-xxxx") or a
+-- numeric index string.
+
+ALTER TABLE recorders
+  ADD COLUMN IF NOT EXISTS gpu_uuid TEXT DEFAULT NULL;
--- a/services/mam-api/src/routes/recorders.js
+++ b/services/mam-api/src/routes/recorders.js
@ -604,6 +604,11 @@ router.post('/:id/start', requireRecorderEdit, async (req, res, next) => {
    // module-level GPU_CODECS list); extend it if av1_nvenc or others are added.
    const useGpu = GPU_CODECS.includes(recorder.recording_codec);

+    // Issue #167 — per-recorder GPU affinity. When recorders.gpu_uuid is set the
+    // sidecar is pinned to that single device (NVIDIA_VISIBLE_DEVICES=<uuid>);
+    // null keeps the legacy "all" behavior. Only meaningful when useGpu is true.
+    const gpuUuid = recorder.gpu_uuid || null;
+
    // Determine whether to spawn locally or via a remote node-agent.
    const { remote: isRemote, apiUrl: targetNodeApiUrl } = await resolveNodeTarget(recorder.node_id);
    // For remote sidecars, the capture container runs on the worker host network and cannot
@ -621,7 +626,7 @@ router.post('/:id/start', requireRecorderEdit, async (req, res, next) => {
      const sidecarRes = await fetch(`${targetNodeApiUrl}/sidecar/start`, {
        method: 'POST',
        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ image: 'wild-dragon-capture:latest', env, capturePort, sourceType, useGpu }),
+        body: JSON.stringify({ image: 'wild-dragon-capture:latest', env, capturePort, sourceType, useGpu, gpuUuid }),
        signal: AbortSignal.timeout(15000),
      });
      if (!sidecarRes.ok) {
@ -664,7 +669,8 @@ router.post('/:id/start', requireRecorderEdit, async (req, res, next) => {

      const localEnv = [...env];
      if (useGpu) {
-        localEnv.push('NVIDIA_VISIBLE_DEVICES=all');
+        // Issue #167 — same per-recorder GPU affinity as the remote sidecar path.
+        localEnv.push(`NVIDIA_VISIBLE_DEVICES=${gpuUuid || 'all'}`);
        localEnv.push('NVIDIA_DRIVER_CAPABILITIES=video,compute,utility');
      }

--- a/services/node-agent/index.js
+++ b/services/node-agent/index.js
@ -102,6 +102,11 @@ async function handleSidecarStart(body, res) {
      // (ProRes, DNxHR, libx264) don't need it and it avoids a hard dep on the
      // NVIDIA container runtime on nodes that have no GPU.
      useGpu = false,
+      // Issue #167 — optional per-recorder GPU affinity. When set to a GPU
+      // UUID (e.g. "GPU-xxxx") or a numeric index, the sidecar is pinned to
+      // that single device via NVIDIA_VISIBLE_DEVICES instead of "all". null /
+      // undefined keeps the legacy "all" behavior (expose every GPU).
+      gpuUuid = null,
    } = body;

    const binds = [`${LIVE_DIR}:/live`];
@ -118,11 +123,14 @@ async function handleSidecarStart(body, res) {
    // Build the sidecar environment, injecting NVIDIA vars when GPU is requested.
    const sidecarEnv = [...env, `PORT=${capturePort}`];
    if (useGpu) {
-      // NVIDIA_VISIBLE_DEVICES=all exposes every GPU on the host.
-      // For a single-GPU node (zampp2 / L4) this is equivalent to pinning GPU 0.
-      // When we later store per-recorder GPU affinity in the DB we can pass a
-      // specific UUID here instead.
-      sidecarEnv.push('NVIDIA_VISIBLE_DEVICES=all');
+      // Issue #167 — per-recorder GPU affinity. A gpuUuid (UUID string or
+      // numeric index) pins the sidecar to exactly that device; otherwise
+      // NVIDIA_VISIBLE_DEVICES=all exposes every GPU on the host (legacy
+      // behavior — for a single-GPU node like zampp2 / L4 this equals GPU 0).
+      const visibleDevices = (gpuUuid != null && String(gpuUuid).trim() !== '')
+        ? String(gpuUuid).trim()
+        : 'all';
+      sidecarEnv.push(`NVIDIA_VISIBLE_DEVICES=${visibleDevices}`);
      sidecarEnv.push('NVIDIA_DRIVER_CAPABILITIES=video,compute,utility');
    }

@ -397,21 +405,39 @@ function sampleCpu() {
 }


-// -- Live GPU utilization sampling -----------------------------------------
+// -- Live GPU / NVENC encode telemetry sampling -----------------------------
 // Spawns a short-lived nvidia container via Docker API on each heartbeat call.
-// Returns array of { index, util_pct, mem_used_mb, mem_total_mb } per GPU,
-// or [] if no GPUs / nvidia runtime unavailable.
+// Returns array of { index, util_pct, enc_util_pct, mem_used_mb, mem_total_mb,
+// nvenc_sessions } per GPU, or [] if no GPUs / nvidia runtime unavailable.
+//
+// Two nvidia-smi queries are run inside one container via `sh -c`, each guarded
+// with `|| true` so a query unsupported on a given driver/GPU (e.g. older cards
+// that don't expose utilization.encoder) doesn't abort the whole sample:
+//   1. --query-gpu  → per-GPU gpu/encoder util + memory
+//   2. --query-compute-apps → pid,used_memory,gpu_uuid for live processes; we
+//      count rows per GPU as an NVENC/compute "session" approximation. Marked
+//      with a SEP line so the two CSV blocks can be told apart in the log.
 async function sampleGpuUtil() {
  if (!_gpuCache || _gpuCache.length === 0) return [];

-  const QUERY = '--query-gpu=index,utilization.gpu,memory.used,memory.total';
-  const FMT   = '--format=csv,noheader,nounits';
+  const GPU_QUERY = '--query-gpu=index,utilization.gpu,utilization.encoder,memory.used,memory.total';
+  const APP_QUERY = '--query-compute-apps=gpu_uuid,pid,used_memory';
+  const FMT       = '--format=csv,noheader,nounits';
+  // Map GPU index → uuid so compute-app rows (keyed by uuid) attach to a GPU.
+  const UUID_QUERY = '--query-gpu=index,uuid';
+  const SCRIPT = [
+    `nvidia-smi ${GPU_QUERY} ${FMT} || true`,
+    `echo '---SEP-APPS---'`,
+    `nvidia-smi ${APP_QUERY} ${FMT} 2>/dev/null || true`,
+    `echo '---SEP-UUID---'`,
+    `nvidia-smi ${UUID_QUERY} ${FMT} 2>/dev/null || true`,
+  ].join('; ');

  let containerId;
  try {
    const createRes = await dockerApi('POST', '/containers/create', {
      Image: 'ubuntu:22.04',
-      Cmd:   ['nvidia-smi', QUERY, FMT],
+      Cmd:   ['sh', '-c', SCRIPT],
      HostConfig: {
        AutoRemove: false,
        Runtime:    'nvidia',
@ -445,11 +471,46 @@ async function sampleGpuUtil() {
    });

    const text = logRes.replace(/[\x00-\x07].{7}/g, '').trim();
-    const lines = text.split('\n').filter(l => /^\d+,/.test(l.trim()));
+    const [gpuBlock = '', appBlock = '', uuidBlock = ''] =
+      text.split(/---SEP-(?:APPS|UUID)---/);
+
+    // uuid → index map (for attributing compute-app rows to a GPU)
+    const uuidToIndex = {};
+    uuidBlock.split('\n').forEach(l => {
+      const m = l.trim().match(/^(\d+)\s*,\s*(GPU-[0-9a-fA-F-]+)/);
+      if (m) uuidToIndex[m[2]] = parseInt(m[1], 10);
+    });
+
+    // NVENC/compute session count per GPU index (best-effort).
+    const sessionsByIndex = {};
+    appBlock.split('\n').forEach(l => {
+      const parts = l.split(',').map(s => s.trim());
+      const uuid = parts[0];
+      if (!uuid || !uuid.startsWith('GPU-')) return;
+      const idx = uuidToIndex[uuid];
+      if (idx == null) return;
+      sessionsByIndex[idx] = (sessionsByIndex[idx] || 0) + 1;
+    });
+
+    const lines = gpuBlock.split('\n').filter(l => /^\s*\d+\s*,/.test(l));

    return lines.map(line => {
-      const [idx, util, memUsed, memTotal] = line.split(',').map(s => parseInt(s.trim(), 10));
-      return { index: idx, util_pct: util, mem_used_mb: memUsed, mem_total_mb: memTotal };
+      // utilization.encoder may report "[N/A]" on cards/drivers that don't
+      // expose it — parseInt yields NaN there, which we coerce to null.
+      const cols = line.split(',').map(s => s.trim());
+      const idx      = parseInt(cols[0], 10);
+      const util     = parseInt(cols[1], 10);
+      const encUtil  = parseInt(cols[2], 10);
+      const memUsed  = parseInt(cols[3], 10);
+      const memTotal = parseInt(cols[4], 10);
+      return {
+        index:          idx,
+        util_pct:       Number.isNaN(util)    ? null : util,
+        enc_util_pct:   Number.isNaN(encUtil) ? null : encUtil,
+        mem_used_mb:    Number.isNaN(memUsed) ? null : memUsed,
+        mem_total_mb:   Number.isNaN(memTotal) ? null : memTotal,
+        nvenc_sessions: sessionsByIndex[idx] || 0,
+      };
    });
  } catch (err) {
    console.warn('[gpu-util] sampling failed:', err.message);
@ -630,6 +691,25 @@ async function heartbeat() {
  const ip_address   = getIp();
  const capabilities = detectHardware();

+  // Issue #166 — fold live NVENC/GPU encode telemetry into capabilities.gpus so
+  // the Cluster screen (which reads cluster_nodes.capabilities.gpus) can render
+  // per-GPU util / encoder util / NVENC sessions alongside the static name+VRAM.
+  // gpu_util is also sent verbatim below for any consumer reading metrics.gpus.
+  if (Array.isArray(capabilities.gpus) && gpu_util.length) {
+    capabilities.gpus = capabilities.gpus.map(g => {
+      const live = gpu_util.find(u => u.index === g.index);
+      if (!live) return g;
+      return {
+        ...g,
+        util_pct:       live.util_pct,
+        enc_util_pct:   live.enc_util_pct,
+        mem_used_mb:    live.mem_used_mb,
+        mem_total_mb:   live.mem_total_mb ?? g.memory_mb ?? null,
+        nvenc_sessions: live.nvenc_sessions,
+      };
+    });
+  }
+
  const payload = {
    hostname:     os.hostname(),
    ip_address,
--- a/services/web-ui/public/screens-admin.jsx
+++ b/services/web-ui/public/screens-admin.jsx
@ -11,6 +11,12 @@ function _normalizeNode(n, x, y) {
    index:    g.index ?? 0,
    device:   g.device || null,
    bound:    !!(g.name && g.memory_mb), // name+memory = nvidia-smi confirmed driver bound
+    // Issue #166 — live NVENC/GPU encode telemetry folded into capabilities.gpus
+    // by the node-agent heartbeat (null until a heartbeat carries it / a GPU node).
+    utilPct:   g.util_pct      != null ? g.util_pct      : null,
+    encUtilPct: g.enc_util_pct != null ? g.enc_util_pct  : null,
+    memUsedMb: g.mem_used_mb    != null ? g.mem_used_mb   : null,
+    nvencSessions: g.nvenc_sessions != null ? g.nvenc_sessions : null,
  }));

  // Blackmagic DeckLink: capabilities.blackmagic + capabilities.blackmagic_model
@ -1706,6 +1712,23 @@ function Cluster() {
                          </div>
                        )}
                        {g.device && <div style={{ fontSize: 10.5, color: "var(--text-4)", fontFamily: "var(--font-mono)" }}>{g.device}</div>}
+                        {/* Issue #166 — live NVENC/GPU encode telemetry (0 until a live encode runs) */}
+                        {(g.utilPct != null || g.encUtilPct != null || g.nvencSessions != null) && (
+                          <div style={{ display: "flex", flexWrap: "wrap", gap: "4px 10px", marginTop: 4, fontSize: 10.5, fontFamily: "var(--font-mono)" }}>
+                            {g.utilPct != null && (
+                              <span style={{ color: "var(--text-3)" }}>GPU <strong style={{ color: "var(--text-2)" }}>{g.utilPct}%</strong></span>
+                            )}
+                            {g.encUtilPct != null && (
+                              <span style={{ color: "var(--text-3)" }}>ENC <strong style={{ color: g.encUtilPct > 0 ? "var(--success)" : "var(--text-2)" }}>{g.encUtilPct}%</strong></span>
+                            )}
+                            {g.memUsedMb != null && g.memMb && (
+                              <span style={{ color: "var(--text-3)" }}>VRAM <strong style={{ color: "var(--text-2)" }}>{g.memUsedMb}/{g.memMb} MB</strong></span>
+                            )}
+                            {g.nvencSessions != null && (
+                              <span style={{ color: "var(--text-3)" }}>NVENC <strong style={{ color: g.nvencSessions > 0 ? "var(--success)" : "var(--text-2)" }}>{g.nvencSessions}</strong></span>
+                            )}
+                          </div>
+                        )}
                      </div>
                      <span style={{
                        fontSize: 10, fontWeight: 600, padding: "2px 6px", borderRadius: 3,