feat(gpu+capture): nvenc HLS preview, source-backend abstraction, GPU affinity+telemetry
#164 HLS preview uses h264_nvenc (forced-IDR, GOP=segment) when the sidecar has the GPU, else keeps libx264 fallback. #168 source-backend abstraction in capture-manager (blackmagic implemented as a behavior-preserving refactor; deltacast/aja stubbed pending hardware). #167 per-recorder gpu_uuid (migration 032) plumbed mam-api->agent-> NVIDIA_VISIBLE_DEVICES (defaults to 'all'). #166 node-agent reports encoder util + NVENC session count per GPU; Cluster screen renders per-GPU GPU/ENC util, VRAM, sessions. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
ca1eec0600
commit
08499b93b2
5 changed files with 291 additions and 67 deletions
|
|
@ -133,6 +133,59 @@ const VIDEO_CODECS = {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// nvenc codecs available in the capture image. Used both to validate the master
|
||||||
|
// codec and (issue #164) as the GPU-availability signal for the HLS preview.
|
||||||
|
const NVENC_CODECS = new Set(['h264_nvenc', 'hevc_nvenc']);
|
||||||
|
|
||||||
|
// ── GPU availability for this sidecar (issue #164) ───────────────────────
|
||||||
|
// The HLS monitor preview should be GPU-encoded (h264_nvenc) when — and only
|
||||||
|
// when — the GPU is actually attached to this capture container. A non-GPU
|
||||||
|
// recorder must keep using libx264, otherwise ffmpeg would fail to open the
|
||||||
|
// nvenc encoder and break the preview.
|
||||||
|
//
|
||||||
|
// Two signals, OR'd for robustness:
|
||||||
|
// 1) The master video codec is an nvenc codec. recorders.js derives `useGpu`
|
||||||
|
// from exactly this (GPU_CODECS = [hevc_nvenc, h264_nvenc]) and node-agent
|
||||||
|
// only attaches the NVIDIA runtime when useGpu is set — so an nvenc master
|
||||||
|
// codec is a reliable proxy for "this sidecar has the GPU".
|
||||||
|
// 2) node-agent injects NVIDIA_VISIBLE_DEVICES into the sidecar env whenever
|
||||||
|
// useGpu is set. This is the most direct in-process evidence the runtime
|
||||||
|
// attached a GPU, and covers the (currently unused) case where the GPU is
|
||||||
|
// present but the master codec is a CPU codec.
|
||||||
|
function gpuAvailableForPreview(masterCodec) {
|
||||||
|
if (NVENC_CODECS.has(masterCodec)) return true;
|
||||||
|
const vis = process.env.NVIDIA_VISIBLE_DEVICES;
|
||||||
|
if (vis && vis !== 'void' && vis !== 'none') return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the HLS preview video-encode args. `segTime` is the HLS segment length
|
||||||
|
// (seconds); we pin the GOP/keyframe interval to one IDR per segment so every
|
||||||
|
// segment starts on a keyframe (misaligned keyframes were the root cause of the
|
||||||
|
// playout preview black/flashing bug — keep the preview robust).
|
||||||
|
function buildHlsVideoArgs(masterCodec, framerate) {
|
||||||
|
// Frames-per-segment for keyframe alignment. The SDI preview runs at the
|
||||||
|
// capture framerate; default to 30 (matches the test-card rate) when unknown.
|
||||||
|
const fps = Number.parseFloat(framerate) || 30;
|
||||||
|
const segTime = 2; // matches -hls_time below
|
||||||
|
const gop = Math.max(1, Math.round(fps * segTime));
|
||||||
|
if (gpuAvailableForPreview(masterCodec)) {
|
||||||
|
// Low-latency NVENC preset (p1 + ll tune). forced-idr + a keyframe every GOP
|
||||||
|
// frames keeps segment boundaries on IDR frames so hls.js can sync cleanly.
|
||||||
|
return [
|
||||||
|
'-c:v', 'h264_nvenc', '-preset', 'p1', '-tune', 'll',
|
||||||
|
'-pix_fmt', 'yuv420p', '-b:v', '2M',
|
||||||
|
'-g', String(gop), '-forced-idr', '1', '-sc_threshold', '0',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
// No GPU → keep the original CPU encode (must not break a non-GPU recorder).
|
||||||
|
return [
|
||||||
|
'-c:v', 'libx264', '-preset', 'veryfast', '-tune', 'zerolatency',
|
||||||
|
'-pix_fmt', 'yuv420p', '-b:v', '2M',
|
||||||
|
'-g', String(gop), '-sc_threshold', '0',
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
const AUDIO_CODECS = {
|
const AUDIO_CODECS = {
|
||||||
pcm_s16le: { args: ['-c:a', 'pcm_s16le'], bitrateControl: false },
|
pcm_s16le: { args: ['-c:a', 'pcm_s16le'], bitrateControl: false },
|
||||||
pcm_s24le: { args: ['-c:a', 'pcm_s24le'], bitrateControl: false },
|
pcm_s24le: { args: ['-c:a', 'pcm_s24le'], bitrateControl: false },
|
||||||
|
|
@ -182,6 +235,88 @@ const GROWING_VIDEO_ARGS = [
|
||||||
];
|
];
|
||||||
const GROWING_EXT = 'mxf';
|
const GROWING_EXT = 'mxf';
|
||||||
|
|
||||||
|
// ── Source-backend abstraction (issue #168) ──────────────────────────────
|
||||||
|
// The capture input was historically hard-wired to a single `-f decklink -i …`
|
||||||
|
// construction. To allow other SDI capture cards (Deltacast, AJA) to be added
|
||||||
|
// later without touching the encode/output/HLS pipeline, the per-backend FFmpeg
|
||||||
|
// INPUT-arg construction now lives behind this map. Each backend exposes:
|
||||||
|
//
|
||||||
|
// buildInput(ctx) -> { inputArgs, isNetwork } (may be async)
|
||||||
|
//
|
||||||
|
// where `ctx` carries the resolved recorder fields the backend needs (device).
|
||||||
|
// The rest of capture-manager consumes the returned `inputArgs` unchanged, so
|
||||||
|
// adding a backend is purely additive.
|
||||||
|
//
|
||||||
|
// IMPORTANT: `blackmagic` is a behaviour-preserving extraction of the previous
|
||||||
|
// default DeckLink path — for an existing DeckLink recorder the produced ffmpeg
|
||||||
|
// input args are byte-for-byte identical to the pre-refactor code. The
|
||||||
|
// `deltacast`/`aja` entries are stubs that throw until the hardware/SDK plumbing
|
||||||
|
// lands.
|
||||||
|
const sourceBackends = {
|
||||||
|
// BlackMagic DeckLink over SDI (the only backend implemented today).
|
||||||
|
// device may be an integer index (0-based) or a full device name string.
|
||||||
|
// FFmpeg 7.x DeckLink requires the full name (e.g. 'DeckLink Duo 2 (2)').
|
||||||
|
// Map integer index -> name using ffmpeg -sources decklink at runtime.
|
||||||
|
//
|
||||||
|
// ffmpeg -sources decklink output format:
|
||||||
|
// Auto-detected sources for decklink:
|
||||||
|
// DeckLink Duo 2
|
||||||
|
// DeckLink Duo 2 (2)
|
||||||
|
// Lines containing device names start with whitespace; the header line
|
||||||
|
// starts with a non-space character. Previous code used a v4l2-style
|
||||||
|
// hex-address regex that never matched DeckLink output → index 1+ always
|
||||||
|
// fell through to a wrong fallback, producing black output from port 2+.
|
||||||
|
blackmagic: {
|
||||||
|
async buildInput({ device }) {
|
||||||
|
let deckLinkName = String(device);
|
||||||
|
if (typeof device === 'number' || /^\d+$/.test(String(device))) {
|
||||||
|
const idx = parseInt(device, 10);
|
||||||
|
try {
|
||||||
|
const { execSync } = await import('child_process');
|
||||||
|
const out = execSync('ffmpeg -hide_banner -sources decklink 2>&1', { encoding: 'utf-8', timeout: 5000 });
|
||||||
|
const names = [];
|
||||||
|
for (const line of out.split('\n')) {
|
||||||
|
// DeckLink source lines: " 81:76669a80:00000000 [DeckLink Duo (1)] (none)"
|
||||||
|
const m = line.match(/^\s+[0-9a-f:]+\s+\[([^\]]+)\]/);
|
||||||
|
if (m) names.push(m[1]);
|
||||||
|
}
|
||||||
|
if (names[idx]) {
|
||||||
|
deckLinkName = names[idx];
|
||||||
|
console.log(`[capture] DeckLink index ${idx} → "${deckLinkName}" (from ${names.length} detected: ${names.join(', ')})`);
|
||||||
|
} else {
|
||||||
|
// Fallback: cannot determine model name without enumeration.
|
||||||
|
// Log a warning — operator should check the detected device list.
|
||||||
|
console.warn(`[capture] DeckLink index ${idx} out of range (detected ${names.length} devices: ${names.join(', ')}). Falling back to index-only input — capture may fail.`);
|
||||||
|
deckLinkName = `DeckLink (${idx})`;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.warn(`[capture] ffmpeg -sources decklink failed: ${err.message}. Using index ${device} directly.`);
|
||||||
|
// Pass the numeric index directly; some ffmpeg builds accept it.
|
||||||
|
deckLinkName = String(device);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
inputArgs: ['-f', 'decklink', '-i', deckLinkName],
|
||||||
|
isNetwork: false,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
// Stubs — hardware/SDK plumbing not yet implemented. These throw clearly so a
|
||||||
|
// misconfigured recorder fails fast instead of silently falling back to the
|
||||||
|
// wrong card.
|
||||||
|
deltacast: {
|
||||||
|
buildInput() {
|
||||||
|
throw new Error('deltacast backend not yet implemented — requires hardware');
|
||||||
|
},
|
||||||
|
},
|
||||||
|
aja: {
|
||||||
|
buildInput() {
|
||||||
|
throw new Error('aja backend not yet implemented — requires hardware');
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
function buildEncodeArgs({
|
function buildEncodeArgs({
|
||||||
codec, videoBitrate, framerate,
|
codec, videoBitrate, framerate,
|
||||||
audioCodec, audioBitrate, audioChannels,
|
audioCodec, audioBitrate, audioChannels,
|
||||||
|
|
@ -257,7 +392,7 @@ class CaptureManager {
|
||||||
* Returns { inputArgs, isNetwork }
|
* Returns { inputArgs, isNetwork }
|
||||||
* @private
|
* @private
|
||||||
*/
|
*/
|
||||||
async _buildInputArgs({ sourceType, device, sourceUrl, listen, listenPort, streamKey }) {
|
async _buildInputArgs({ sourceType, sourceBackend = 'blackmagic', device, sourceUrl, listen, listenPort, streamKey }) {
|
||||||
if (sourceType === 'srt') {
|
if (sourceType === 'srt') {
|
||||||
let url;
|
let url;
|
||||||
if (listen) {
|
if (listen) {
|
||||||
|
|
@ -321,50 +456,15 @@ class CaptureManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default: SDI via DeckLink
|
// Default: SDI via a pluggable source backend (issue #168). The backend
|
||||||
// device may be an integer index (0-based) or a full device name string.
|
// selection defaults to `blackmagic` (DeckLink) so existing SDI recorders
|
||||||
// FFmpeg 7.x DeckLink requires the full name (e.g. 'DeckLink Duo 2 (2)').
|
// behave exactly as before. Deltacast/AJA backends throw until their
|
||||||
// Map integer index -> name using ffmpeg -sources decklink at runtime.
|
// hardware/SDK plumbing lands.
|
||||||
//
|
const backend = sourceBackends[sourceBackend];
|
||||||
// ffmpeg -sources decklink output format:
|
if (!backend) {
|
||||||
// Auto-detected sources for decklink:
|
throw new Error(`Unknown source backend "${sourceBackend}" — expected one of: ${Object.keys(sourceBackends).join(', ')}`);
|
||||||
// DeckLink Duo 2
|
|
||||||
// DeckLink Duo 2 (2)
|
|
||||||
// Lines containing device names start with whitespace; the header line
|
|
||||||
// starts with a non-space character. Previous code used a v4l2-style
|
|
||||||
// hex-address regex that never matched DeckLink output → index 1+ always
|
|
||||||
// fell through to a wrong fallback, producing black output from port 2+.
|
|
||||||
let deckLinkName = String(device);
|
|
||||||
if (typeof device === 'number' || /^\d+$/.test(String(device))) {
|
|
||||||
const idx = parseInt(device, 10);
|
|
||||||
try {
|
|
||||||
const { execSync } = await import('child_process');
|
|
||||||
const out = execSync('ffmpeg -hide_banner -sources decklink 2>&1', { encoding: 'utf-8', timeout: 5000 });
|
|
||||||
const names = [];
|
|
||||||
for (const line of out.split('\n')) {
|
|
||||||
// DeckLink source lines: " 81:76669a80:00000000 [DeckLink Duo (1)] (none)"
|
|
||||||
const m = line.match(/^\s+[0-9a-f:]+\s+\[([^\]]+)\]/);
|
|
||||||
if (m) names.push(m[1]);
|
|
||||||
}
|
|
||||||
if (names[idx]) {
|
|
||||||
deckLinkName = names[idx];
|
|
||||||
console.log(`[capture] DeckLink index ${idx} → "${deckLinkName}" (from ${names.length} detected: ${names.join(', ')})`);
|
|
||||||
} else {
|
|
||||||
// Fallback: cannot determine model name without enumeration.
|
|
||||||
// Log a warning — operator should check the detected device list.
|
|
||||||
console.warn(`[capture] DeckLink index ${idx} out of range (detected ${names.length} devices: ${names.join(', ')}). Falling back to index-only input — capture may fail.`);
|
|
||||||
deckLinkName = `DeckLink (${idx})`;
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.warn(`[capture] ffmpeg -sources decklink failed: ${err.message}. Using index ${device} directly.`);
|
|
||||||
// Pass the numeric index directly; some ffmpeg builds accept it.
|
|
||||||
deckLinkName = String(device);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return {
|
return await backend.buildInput({ device });
|
||||||
inputArgs: ['-f', 'decklink', '-i', deckLinkName],
|
|
||||||
isNetwork: false,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -380,6 +480,9 @@ class CaptureManager {
|
||||||
clipName,
|
clipName,
|
||||||
device,
|
device,
|
||||||
sourceType = 'sdi',
|
sourceType = 'sdi',
|
||||||
|
// Source-backend selection for SDI capture (issue #168). Defaults to
|
||||||
|
// `blackmagic` (DeckLink) so existing recorders are unaffected.
|
||||||
|
sourceBackend = 'blackmagic',
|
||||||
sourceUrl,
|
sourceUrl,
|
||||||
listen = false,
|
listen = false,
|
||||||
listenPort,
|
listenPort,
|
||||||
|
|
@ -447,7 +550,7 @@ class CaptureManager {
|
||||||
const startedAt = new Date().toISOString();
|
const startedAt = new Date().toISOString();
|
||||||
|
|
||||||
const { inputArgs, isNetwork } = await this._buildInputArgs({
|
const { inputArgs, isNetwork } = await this._buildInputArgs({
|
||||||
sourceType, device, sourceUrl, listen, listenPort, streamKey,
|
sourceType, sourceBackend, device, sourceUrl, listen, listenPort, streamKey,
|
||||||
});
|
});
|
||||||
|
|
||||||
const hiresCodecArgs = buildEncodeArgs({
|
const hiresCodecArgs = buildEncodeArgs({
|
||||||
|
|
@ -506,10 +609,12 @@ class CaptureManager {
|
||||||
'-map', '[vhi]', '-map', '0:a:0?',
|
'-map', '[vhi]', '-map', '0:a:0?',
|
||||||
...hiresCodecArgs,
|
...hiresCodecArgs,
|
||||||
hiresOutput,
|
hiresOutput,
|
||||||
// Output 1 — low-latency H.264 HLS preview for the UI monitor
|
// Output 1 — low-latency H.264 HLS preview for the UI monitor.
|
||||||
|
// GPU-encoded (h264_nvenc) when the GPU is attached to this sidecar,
|
||||||
|
// otherwise libx264 (issue #164). GOP is pinned to one IDR per HLS
|
||||||
|
// segment so segments start on keyframes (avoids black/flashing).
|
||||||
'-map', '[vlo]', '-map', '0:a:0?',
|
'-map', '[vlo]', '-map', '0:a:0?',
|
||||||
'-c:v', 'libx264', '-preset', 'veryfast', '-tune', 'zerolatency',
|
...buildHlsVideoArgs(videoCodec, framerate),
|
||||||
'-pix_fmt', 'yuv420p', '-b:v', '2M', '-g', '60', '-sc_threshold', '0',
|
|
||||||
'-c:a', 'aac', '-b:a', '128k', '-ar', '44100',
|
'-c:a', 'aac', '-b:a', '128k', '-ar', '44100',
|
||||||
'-f', 'hls', '-hls_time', '2', '-hls_list_size', '15',
|
'-f', 'hls', '-hls_time', '2', '-hls_list_size', '15',
|
||||||
'-hls_flags', 'delete_segments+append_list+omit_endlist',
|
'-hls_flags', 'delete_segments+append_list+omit_endlist',
|
||||||
|
|
@ -541,8 +646,8 @@ class CaptureManager {
|
||||||
const hlsArgs = [
|
const hlsArgs = [
|
||||||
...inputArgs,
|
...inputArgs,
|
||||||
'-map', '0:v:0?', '-map', '0:a:0?',
|
'-map', '0:v:0?', '-map', '0:a:0?',
|
||||||
'-c:v', 'libx264', '-preset', 'veryfast', '-tune', 'zerolatency',
|
// GPU-gated preview encode, same as the SDI 2nd-output path (#164).
|
||||||
'-pix_fmt', 'yuv420p', '-b:v', '2M', '-g', '60', '-sc_threshold', '0',
|
...buildHlsVideoArgs(videoCodec, framerate),
|
||||||
'-c:a', 'aac', '-b:a', '128k', '-ar', '44100',
|
'-c:a', 'aac', '-b:a', '128k', '-ar', '44100',
|
||||||
'-f', 'hls', '-hls_time', '2', '-hls_list_size', '15',
|
'-f', 'hls', '-hls_time', '2', '-hls_list_size', '15',
|
||||||
'-hls_flags', 'delete_segments+append_list+omit_endlist',
|
'-hls_flags', 'delete_segments+append_list+omit_endlist',
|
||||||
|
|
@ -756,4 +861,4 @@ class CaptureManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
export default new CaptureManager();
|
export default new CaptureManager();
|
||||||
export { VIDEO_CODECS, AUDIO_CODECS, CONTAINER_FMT, CONTAINER_EXT };
|
export { VIDEO_CODECS, AUDIO_CODECS, CONTAINER_FMT, CONTAINER_EXT, sourceBackends };
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,10 @@
|
||||||
|
-- Migration 032: Per-recorder GPU affinity (Issue #167)
|
||||||
|
-- Adds a nullable GPU UUID to the recorders table so each recorder can be
|
||||||
|
-- pinned to a specific GPU on its node. The value is passed through to the
|
||||||
|
-- node-agent sidecar-start payload and becomes NVIDIA_VISIBLE_DEVICES for the
|
||||||
|
-- capture container. NULL = legacy behavior (NVIDIA_VISIBLE_DEVICES=all, i.e.
|
||||||
|
-- every GPU visible). Accepts an nvidia-smi GPU UUID (e.g. "GPU-xxxx") or a
|
||||||
|
-- numeric index string.
|
||||||
|
|
||||||
|
ALTER TABLE recorders
|
||||||
|
ADD COLUMN IF NOT EXISTS gpu_uuid TEXT DEFAULT NULL;
|
||||||
|
|
@ -604,6 +604,11 @@ router.post('/:id/start', requireRecorderEdit, async (req, res, next) => {
|
||||||
// module-level GPU_CODECS list); extend it if av1_nvenc or others are added.
|
// module-level GPU_CODECS list); extend it if av1_nvenc or others are added.
|
||||||
const useGpu = GPU_CODECS.includes(recorder.recording_codec);
|
const useGpu = GPU_CODECS.includes(recorder.recording_codec);
|
||||||
|
|
||||||
|
// Issue #167 — per-recorder GPU affinity. When recorders.gpu_uuid is set the
|
||||||
|
// sidecar is pinned to that single device (NVIDIA_VISIBLE_DEVICES=<uuid>);
|
||||||
|
// null keeps the legacy "all" behavior. Only meaningful when useGpu is true.
|
||||||
|
const gpuUuid = recorder.gpu_uuid || null;
|
||||||
|
|
||||||
// Determine whether to spawn locally or via a remote node-agent.
|
// Determine whether to spawn locally or via a remote node-agent.
|
||||||
const { remote: isRemote, apiUrl: targetNodeApiUrl } = await resolveNodeTarget(recorder.node_id);
|
const { remote: isRemote, apiUrl: targetNodeApiUrl } = await resolveNodeTarget(recorder.node_id);
|
||||||
// For remote sidecars, the capture container runs on the worker host network and cannot
|
// For remote sidecars, the capture container runs on the worker host network and cannot
|
||||||
|
|
@ -621,7 +626,7 @@ router.post('/:id/start', requireRecorderEdit, async (req, res, next) => {
|
||||||
const sidecarRes = await fetch(`${targetNodeApiUrl}/sidecar/start`, {
|
const sidecarRes = await fetch(`${targetNodeApiUrl}/sidecar/start`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
body: JSON.stringify({ image: 'wild-dragon-capture:latest', env, capturePort, sourceType, useGpu }),
|
body: JSON.stringify({ image: 'wild-dragon-capture:latest', env, capturePort, sourceType, useGpu, gpuUuid }),
|
||||||
signal: AbortSignal.timeout(15000),
|
signal: AbortSignal.timeout(15000),
|
||||||
});
|
});
|
||||||
if (!sidecarRes.ok) {
|
if (!sidecarRes.ok) {
|
||||||
|
|
@ -664,7 +669,8 @@ router.post('/:id/start', requireRecorderEdit, async (req, res, next) => {
|
||||||
|
|
||||||
const localEnv = [...env];
|
const localEnv = [...env];
|
||||||
if (useGpu) {
|
if (useGpu) {
|
||||||
localEnv.push('NVIDIA_VISIBLE_DEVICES=all');
|
// Issue #167 — same per-recorder GPU affinity as the remote sidecar path.
|
||||||
|
localEnv.push(`NVIDIA_VISIBLE_DEVICES=${gpuUuid || 'all'}`);
|
||||||
localEnv.push('NVIDIA_DRIVER_CAPABILITIES=video,compute,utility');
|
localEnv.push('NVIDIA_DRIVER_CAPABILITIES=video,compute,utility');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,11 @@ async function handleSidecarStart(body, res) {
|
||||||
// (ProRes, DNxHR, libx264) don't need it and it avoids a hard dep on the
|
// (ProRes, DNxHR, libx264) don't need it and it avoids a hard dep on the
|
||||||
// NVIDIA container runtime on nodes that have no GPU.
|
// NVIDIA container runtime on nodes that have no GPU.
|
||||||
useGpu = false,
|
useGpu = false,
|
||||||
|
// Issue #167 — optional per-recorder GPU affinity. When set to a GPU
|
||||||
|
// UUID (e.g. "GPU-xxxx") or a numeric index, the sidecar is pinned to
|
||||||
|
// that single device via NVIDIA_VISIBLE_DEVICES instead of "all". null /
|
||||||
|
// undefined keeps the legacy "all" behavior (expose every GPU).
|
||||||
|
gpuUuid = null,
|
||||||
} = body;
|
} = body;
|
||||||
|
|
||||||
const binds = [`${LIVE_DIR}:/live`];
|
const binds = [`${LIVE_DIR}:/live`];
|
||||||
|
|
@ -118,11 +123,14 @@ async function handleSidecarStart(body, res) {
|
||||||
// Build the sidecar environment, injecting NVIDIA vars when GPU is requested.
|
// Build the sidecar environment, injecting NVIDIA vars when GPU is requested.
|
||||||
const sidecarEnv = [...env, `PORT=${capturePort}`];
|
const sidecarEnv = [...env, `PORT=${capturePort}`];
|
||||||
if (useGpu) {
|
if (useGpu) {
|
||||||
// NVIDIA_VISIBLE_DEVICES=all exposes every GPU on the host.
|
// Issue #167 — per-recorder GPU affinity. A gpuUuid (UUID string or
|
||||||
// For a single-GPU node (zampp2 / L4) this is equivalent to pinning GPU 0.
|
// numeric index) pins the sidecar to exactly that device; otherwise
|
||||||
// When we later store per-recorder GPU affinity in the DB we can pass a
|
// NVIDIA_VISIBLE_DEVICES=all exposes every GPU on the host (legacy
|
||||||
// specific UUID here instead.
|
// behavior — for a single-GPU node like zampp2 / L4 this equals GPU 0).
|
||||||
sidecarEnv.push('NVIDIA_VISIBLE_DEVICES=all');
|
const visibleDevices = (gpuUuid != null && String(gpuUuid).trim() !== '')
|
||||||
|
? String(gpuUuid).trim()
|
||||||
|
: 'all';
|
||||||
|
sidecarEnv.push(`NVIDIA_VISIBLE_DEVICES=${visibleDevices}`);
|
||||||
sidecarEnv.push('NVIDIA_DRIVER_CAPABILITIES=video,compute,utility');
|
sidecarEnv.push('NVIDIA_DRIVER_CAPABILITIES=video,compute,utility');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -397,21 +405,39 @@ function sampleCpu() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// -- Live GPU utilization sampling -----------------------------------------
|
// -- Live GPU / NVENC encode telemetry sampling -----------------------------
|
||||||
// Spawns a short-lived nvidia container via Docker API on each heartbeat call.
|
// Spawns a short-lived nvidia container via Docker API on each heartbeat call.
|
||||||
// Returns array of { index, util_pct, mem_used_mb, mem_total_mb } per GPU,
|
// Returns array of { index, util_pct, enc_util_pct, mem_used_mb, mem_total_mb,
|
||||||
// or [] if no GPUs / nvidia runtime unavailable.
|
// nvenc_sessions } per GPU, or [] if no GPUs / nvidia runtime unavailable.
|
||||||
|
//
|
||||||
|
// Two nvidia-smi queries are run inside one container via `sh -c`, each guarded
|
||||||
|
// with `|| true` so a query unsupported on a given driver/GPU (e.g. older cards
|
||||||
|
// that don't expose utilization.encoder) doesn't abort the whole sample:
|
||||||
|
// 1. --query-gpu → per-GPU gpu/encoder util + memory
|
||||||
|
// 2. --query-compute-apps → pid,used_memory,gpu_uuid for live processes; we
|
||||||
|
// count rows per GPU as an NVENC/compute "session" approximation. Marked
|
||||||
|
// with a SEP line so the two CSV blocks can be told apart in the log.
|
||||||
async function sampleGpuUtil() {
|
async function sampleGpuUtil() {
|
||||||
if (!_gpuCache || _gpuCache.length === 0) return [];
|
if (!_gpuCache || _gpuCache.length === 0) return [];
|
||||||
|
|
||||||
const QUERY = '--query-gpu=index,utilization.gpu,memory.used,memory.total';
|
const GPU_QUERY = '--query-gpu=index,utilization.gpu,utilization.encoder,memory.used,memory.total';
|
||||||
const FMT = '--format=csv,noheader,nounits';
|
const APP_QUERY = '--query-compute-apps=gpu_uuid,pid,used_memory';
|
||||||
|
const FMT = '--format=csv,noheader,nounits';
|
||||||
|
// Map GPU index → uuid so compute-app rows (keyed by uuid) attach to a GPU.
|
||||||
|
const UUID_QUERY = '--query-gpu=index,uuid';
|
||||||
|
const SCRIPT = [
|
||||||
|
`nvidia-smi ${GPU_QUERY} ${FMT} || true`,
|
||||||
|
`echo '---SEP-APPS---'`,
|
||||||
|
`nvidia-smi ${APP_QUERY} ${FMT} 2>/dev/null || true`,
|
||||||
|
`echo '---SEP-UUID---'`,
|
||||||
|
`nvidia-smi ${UUID_QUERY} ${FMT} 2>/dev/null || true`,
|
||||||
|
].join('; ');
|
||||||
|
|
||||||
let containerId;
|
let containerId;
|
||||||
try {
|
try {
|
||||||
const createRes = await dockerApi('POST', '/containers/create', {
|
const createRes = await dockerApi('POST', '/containers/create', {
|
||||||
Image: 'ubuntu:22.04',
|
Image: 'ubuntu:22.04',
|
||||||
Cmd: ['nvidia-smi', QUERY, FMT],
|
Cmd: ['sh', '-c', SCRIPT],
|
||||||
HostConfig: {
|
HostConfig: {
|
||||||
AutoRemove: false,
|
AutoRemove: false,
|
||||||
Runtime: 'nvidia',
|
Runtime: 'nvidia',
|
||||||
|
|
@ -445,11 +471,46 @@ async function sampleGpuUtil() {
|
||||||
});
|
});
|
||||||
|
|
||||||
const text = logRes.replace(/[\x00-\x07].{7}/g, '').trim();
|
const text = logRes.replace(/[\x00-\x07].{7}/g, '').trim();
|
||||||
const lines = text.split('\n').filter(l => /^\d+,/.test(l.trim()));
|
const [gpuBlock = '', appBlock = '', uuidBlock = ''] =
|
||||||
|
text.split(/---SEP-(?:APPS|UUID)---/);
|
||||||
|
|
||||||
|
// uuid → index map (for attributing compute-app rows to a GPU)
|
||||||
|
const uuidToIndex = {};
|
||||||
|
uuidBlock.split('\n').forEach(l => {
|
||||||
|
const m = l.trim().match(/^(\d+)\s*,\s*(GPU-[0-9a-fA-F-]+)/);
|
||||||
|
if (m) uuidToIndex[m[2]] = parseInt(m[1], 10);
|
||||||
|
});
|
||||||
|
|
||||||
|
// NVENC/compute session count per GPU index (best-effort).
|
||||||
|
const sessionsByIndex = {};
|
||||||
|
appBlock.split('\n').forEach(l => {
|
||||||
|
const parts = l.split(',').map(s => s.trim());
|
||||||
|
const uuid = parts[0];
|
||||||
|
if (!uuid || !uuid.startsWith('GPU-')) return;
|
||||||
|
const idx = uuidToIndex[uuid];
|
||||||
|
if (idx == null) return;
|
||||||
|
sessionsByIndex[idx] = (sessionsByIndex[idx] || 0) + 1;
|
||||||
|
});
|
||||||
|
|
||||||
|
const lines = gpuBlock.split('\n').filter(l => /^\s*\d+\s*,/.test(l));
|
||||||
|
|
||||||
return lines.map(line => {
|
return lines.map(line => {
|
||||||
const [idx, util, memUsed, memTotal] = line.split(',').map(s => parseInt(s.trim(), 10));
|
// utilization.encoder may report "[N/A]" on cards/drivers that don't
|
||||||
return { index: idx, util_pct: util, mem_used_mb: memUsed, mem_total_mb: memTotal };
|
// expose it — parseInt yields NaN there, which we coerce to null.
|
||||||
|
const cols = line.split(',').map(s => s.trim());
|
||||||
|
const idx = parseInt(cols[0], 10);
|
||||||
|
const util = parseInt(cols[1], 10);
|
||||||
|
const encUtil = parseInt(cols[2], 10);
|
||||||
|
const memUsed = parseInt(cols[3], 10);
|
||||||
|
const memTotal = parseInt(cols[4], 10);
|
||||||
|
return {
|
||||||
|
index: idx,
|
||||||
|
util_pct: Number.isNaN(util) ? null : util,
|
||||||
|
enc_util_pct: Number.isNaN(encUtil) ? null : encUtil,
|
||||||
|
mem_used_mb: Number.isNaN(memUsed) ? null : memUsed,
|
||||||
|
mem_total_mb: Number.isNaN(memTotal) ? null : memTotal,
|
||||||
|
nvenc_sessions: sessionsByIndex[idx] || 0,
|
||||||
|
};
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.warn('[gpu-util] sampling failed:', err.message);
|
console.warn('[gpu-util] sampling failed:', err.message);
|
||||||
|
|
@ -630,6 +691,25 @@ async function heartbeat() {
|
||||||
const ip_address = getIp();
|
const ip_address = getIp();
|
||||||
const capabilities = detectHardware();
|
const capabilities = detectHardware();
|
||||||
|
|
||||||
|
// Issue #166 — fold live NVENC/GPU encode telemetry into capabilities.gpus so
|
||||||
|
// the Cluster screen (which reads cluster_nodes.capabilities.gpus) can render
|
||||||
|
// per-GPU util / encoder util / NVENC sessions alongside the static name+VRAM.
|
||||||
|
// gpu_util is also sent verbatim below for any consumer reading metrics.gpus.
|
||||||
|
if (Array.isArray(capabilities.gpus) && gpu_util.length) {
|
||||||
|
capabilities.gpus = capabilities.gpus.map(g => {
|
||||||
|
const live = gpu_util.find(u => u.index === g.index);
|
||||||
|
if (!live) return g;
|
||||||
|
return {
|
||||||
|
...g,
|
||||||
|
util_pct: live.util_pct,
|
||||||
|
enc_util_pct: live.enc_util_pct,
|
||||||
|
mem_used_mb: live.mem_used_mb,
|
||||||
|
mem_total_mb: live.mem_total_mb ?? g.memory_mb ?? null,
|
||||||
|
nvenc_sessions: live.nvenc_sessions,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
const payload = {
|
const payload = {
|
||||||
hostname: os.hostname(),
|
hostname: os.hostname(),
|
||||||
ip_address,
|
ip_address,
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,12 @@ function _normalizeNode(n, x, y) {
|
||||||
index: g.index ?? 0,
|
index: g.index ?? 0,
|
||||||
device: g.device || null,
|
device: g.device || null,
|
||||||
bound: !!(g.name && g.memory_mb), // name+memory = nvidia-smi confirmed driver bound
|
bound: !!(g.name && g.memory_mb), // name+memory = nvidia-smi confirmed driver bound
|
||||||
|
// Issue #166 — live NVENC/GPU encode telemetry folded into capabilities.gpus
|
||||||
|
// by the node-agent heartbeat (null until a heartbeat carries it / a GPU node).
|
||||||
|
utilPct: g.util_pct != null ? g.util_pct : null,
|
||||||
|
encUtilPct: g.enc_util_pct != null ? g.enc_util_pct : null,
|
||||||
|
memUsedMb: g.mem_used_mb != null ? g.mem_used_mb : null,
|
||||||
|
nvencSessions: g.nvenc_sessions != null ? g.nvenc_sessions : null,
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Blackmagic DeckLink: capabilities.blackmagic + capabilities.blackmagic_model
|
// Blackmagic DeckLink: capabilities.blackmagic + capabilities.blackmagic_model
|
||||||
|
|
@ -1706,6 +1712,23 @@ function Cluster() {
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
{g.device && <div style={{ fontSize: 10.5, color: "var(--text-4)", fontFamily: "var(--font-mono)" }}>{g.device}</div>}
|
{g.device && <div style={{ fontSize: 10.5, color: "var(--text-4)", fontFamily: "var(--font-mono)" }}>{g.device}</div>}
|
||||||
|
{/* Issue #166 — live NVENC/GPU encode telemetry (0 until a live encode runs) */}
|
||||||
|
{(g.utilPct != null || g.encUtilPct != null || g.nvencSessions != null) && (
|
||||||
|
<div style={{ display: "flex", flexWrap: "wrap", gap: "4px 10px", marginTop: 4, fontSize: 10.5, fontFamily: "var(--font-mono)" }}>
|
||||||
|
{g.utilPct != null && (
|
||||||
|
<span style={{ color: "var(--text-3)" }}>GPU <strong style={{ color: "var(--text-2)" }}>{g.utilPct}%</strong></span>
|
||||||
|
)}
|
||||||
|
{g.encUtilPct != null && (
|
||||||
|
<span style={{ color: "var(--text-3)" }}>ENC <strong style={{ color: g.encUtilPct > 0 ? "var(--success)" : "var(--text-2)" }}>{g.encUtilPct}%</strong></span>
|
||||||
|
)}
|
||||||
|
{g.memUsedMb != null && g.memMb && (
|
||||||
|
<span style={{ color: "var(--text-3)" }}>VRAM <strong style={{ color: "var(--text-2)" }}>{g.memUsedMb}/{g.memMb} MB</strong></span>
|
||||||
|
)}
|
||||||
|
{g.nvencSessions != null && (
|
||||||
|
<span style={{ color: "var(--text-3)" }}>NVENC <strong style={{ color: g.nvencSessions > 0 ? "var(--success)" : "var(--text-2)" }}>{g.nvencSessions}</strong></span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<span style={{
|
<span style={{
|
||||||
fontSize: 10, fontWeight: 600, padding: "2px 6px", borderRadius: 3,
|
fontSize: 10, fontWeight: 600, padding: "2px 6px", borderRadius: 3,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue