diff --git a/services/node-agent/index.js b/services/node-agent/index.js index 150cfec..26e4205 100644 --- a/services/node-agent/index.js +++ b/services/node-agent/index.js @@ -2,7 +2,7 @@ import http from 'http'; import os from 'os'; import fs from 'fs'; import crypto from 'crypto'; -import { spawn, execFileSync } from 'child_process'; +import { spawn } from 'child_process'; const MAM_API_URL = (process.env.MAM_API_URL || 'http://localhost:3000').replace(/\/$/, ''); const NODE_TOKEN = process.env.NODE_TOKEN || ''; @@ -32,20 +32,16 @@ const VERSION = '1.4.0'; // 8 ports = 16 sessions hammering one card → it falls below realtime → the // framecache ring laps → video freezes/stutters then recovers. Pinning each // sidecar to GPU (port % CAPTURE_GPU_COUNT) spreads the load across all cards. -// Auto-detected from nvidia-smi at boot; override with CAPTURE_GPU_COUNT. -let CAPTURE_GPU_COUNT = parseInt(process.env.CAPTURE_GPU_COUNT || '0', 10) || 0; +// GPU count comes from _gpuCache (populated at startup via probeGpusViaSmi, +// which runs nvidia-smi in a throwaway GPU container because the node-agent +// image itself has no nvidia-smi). CAPTURE_GPU_COUNT env overrides everything. function detectGpuCount() { - if (CAPTURE_GPU_COUNT > 0) return CAPTURE_GPU_COUNT; - try { - const out = execFileSync('nvidia-smi', ['--query-gpu=index', '--format=csv,noheader'], { timeout: 5000 }) - .toString().trim(); - const n = out ? out.split('\n').filter(Boolean).length : 0; - CAPTURE_GPU_COUNT = n > 0 ? n : 1; - } catch (_) { - CAPTURE_GPU_COUNT = 1; // no nvidia-smi / no GPU — single-device fallback - } - console.log(`[gpu] capture encodes will spread across ${CAPTURE_GPU_COUNT} GPU(s)`); - return CAPTURE_GPU_COUNT; + const envN = parseInt(process.env.CAPTURE_GPU_COUNT || '0', 10) || 0; + if (envN > 0) return envN; + // _gpuCache is declared later (function hoisting makes the ref fine at call + // time). null = not probed yet; [] = no GPUs; otherwise one entry per GPU. + if (Array.isArray(_gpuCache) && _gpuCache.length > 0) return _gpuCache.length; + return 1; // not probed yet / no GPUs → single-device fallback ('all') } // Choose the NVIDIA_VISIBLE_DEVICES value for a capture sidecar. An explicit