dragonflight/services/node-agent/index.js
ZGaetano 3b4af6ef11 node-agent: prefer NODE_IP and skip docker bridge interfaces
In bridge mode the agent was reporting the container's 172.x address
because the first non-internal interface in os.networkInterfaces() was
docker0. Now honours NODE_IP, skips lo/docker*/br-*/veth*/etc, and
down-ranks the 172.16-31 range so real LAN IPs win. Also exposes the
detected IP on /health for the onboarding script to print.
2026-05-21 00:15:03 -04:00

170 lines
6.3 KiB
JavaScript

import http from 'http';
import os from 'os';
import fs from 'fs';
const MAM_API_URL = (process.env.MAM_API_URL || 'http://localhost:3000').replace(/\/$/, '');
const NODE_TOKEN = process.env.NODE_TOKEN || '';
const NODE_ROLE = process.env.NODE_ROLE || 'worker';
const AGENT_PORT = parseInt(process.env.AGENT_PORT || '7436', 10);
const HEARTBEAT_MS = parseInt(process.env.HEARTBEAT_MS || '30000', 10);
const VERSION = '1.1.0';
// Pick the host's LAN IP. Inside a bridge-mode container,
// os.networkInterfaces() returns the container's docker-bridge IP (172.x),
// not the host's LAN address. Two strategies:
// 1. honour an explicit NODE_IP override (set by onboard-node.sh)
// 2. filter out interfaces that obviously belong to container/virtual
// bridges and down-rank the docker bridge range so real LAN IPs win
//
// Combine with `network_mode: host` in docker-compose.worker.yml for the
// most reliable result — that lets os.networkInterfaces() see the host's
// real adapters directly.
function getIp() {
if (process.env.NODE_IP) return process.env.NODE_IP;
const SKIP_IFACE = /^(lo|docker\d*|br-|veth|cni|flannel|cali|tun|tap|virbr|kube)/i;
const isContainerBridge = (ip) => /^172\.(1[6-9]|2\d|3[01])\./.test(ip);
const candidates = [];
for (const [name, addrs] of Object.entries(os.networkInterfaces())) {
if (SKIP_IFACE.test(name)) continue;
for (const a of (addrs || [])) {
if (a.family !== 'IPv4' || a.internal) continue;
candidates.push({ name, address: a.address, rank: isContainerBridge(a.address) ? 1 : 0 });
}
}
candidates.sort((a, b) => a.rank - b.rank);
return candidates.length ? candidates[0].address : null;
}
const server = http.createServer((req, res) => {
if (req.method === 'GET' && req.url === '/health') {
res.writeHead(200, { 'Content-Type': 'application/json' });
res.end(JSON.stringify({
ok: true,
hostname: os.hostname(),
uptime: Math.round(process.uptime()),
version: VERSION,
role: NODE_ROLE,
ip: getIp(),
}));
} else {
res.writeHead(404);
res.end();
}
});
// ── CPU sampling (500ms window) ───────────────────────────────────────────
function sampleCpu() {
return new Promise(resolve => {
const s1 = os.cpus();
setTimeout(() => {
const s2 = os.cpus();
let idle = 0, total = 0;
s2.forEach((cpu, i) => {
for (const t of Object.keys(cpu.times)) {
const d = cpu.times[t] - (s1[i]?.times[t] ?? 0);
total += d;
if (t === 'idle') idle += d;
}
});
resolve(total > 0 ? Math.round((1 - idle / total) * 10000) / 100 : 0);
}, 500);
});
}
// ── Hardware detection ────────────────────────────────────────────────────
// GPU_COUNT / BMD_COUNT env vars override filesystem detection when /dev isn't mapped
function detectHardware() {
const capabilities = { gpus: [], blackmagic: [] };
const gpuOverride = parseInt(process.env.GPU_COUNT || '-1', 10);
if (gpuOverride >= 0) {
for (let i = 0; i < gpuOverride; i++) {
capabilities.gpus.push({ device: `/dev/nvidia${i}`, type: 'nvidia', index: i });
}
} else {
for (let i = 0; i < 16; i++) {
try {
fs.accessSync(`/dev/nvidia${i}`, fs.constants.F_OK);
capabilities.gpus.push({ device: `/dev/nvidia${i}`, type: 'nvidia', index: i });
} catch (_) { break; }
}
}
const bmdOverride = parseInt(process.env.BMD_COUNT || '-1', 10);
if (bmdOverride >= 0) {
for (let i = 0; i < bmdOverride; i++) {
capabilities.blackmagic.push({ device: `/dev/blackmagic/dv${i}`, index: i });
}
} else {
try {
const bmdEntries = fs.readdirSync('/dev/blackmagic');
capabilities.blackmagic = bmdEntries.map((d, i) => ({ device: `/dev/blackmagic/${d}`, index: i }));
} catch (_) {}
}
// Best-effort model name from BMD_MODEL env (set manually) — used by the UI
// to render the correct card layout (Duo 2, Quad 2, Mini Recorder, ...).
if (process.env.BMD_MODEL) capabilities.blackmagic_model = process.env.BMD_MODEL;
return capabilities;
}
// ── Heartbeat ─────────────────────────────────────────────────────────────
async function heartbeat() {
const cpu_usage = await sampleCpu();
const totalMem = os.totalmem();
const freeMem = os.freemem();
const ip_address = getIp();
const capabilities = detectHardware();
const payload = {
hostname: os.hostname(),
ip_address,
role: NODE_ROLE,
version: VERSION,
api_url: `http://${ip_address || os.hostname()}:${AGENT_PORT}`,
cpu_usage,
mem_used_mb: Math.round((totalMem - freeMem) / 1048576),
mem_total_mb: Math.round(totalMem / 1048576),
capabilities,
};
const headers = { 'Content-Type': 'application/json' };
if (NODE_TOKEN) headers['Authorization'] = `Bearer ${NODE_TOKEN}`;
try {
const res = await fetch(`${MAM_API_URL}/api/v1/cluster/heartbeat`, {
method: 'POST',
headers,
body: JSON.stringify(payload),
signal: AbortSignal.timeout(8000),
});
if (res.ok) {
const gpuStr = capabilities.gpus.length ? ` gpu=${capabilities.gpus.length}` : '';
const bmdStr = capabilities.blackmagic.length ? ` bmd=${capabilities.blackmagic.length}` : '';
process.stdout.write(
`[hb] ${payload.hostname} ip=${ip_address || '?'} cpu=${cpu_usage}% ` +
`mem=${payload.mem_used_mb}/${payload.mem_total_mb}MB${gpuStr}${bmdStr}\n`
);
} else {
const txt = await res.text().catch(() => '');
console.error(`[hb] ${res.status} ${txt.slice(0, 120)}`);
}
} catch (err) {
console.error(`[hb] failed — ${err.message}`);
}
}
heartbeat();
setInterval(heartbeat, HEARTBEAT_MS);
server.listen(AGENT_PORT, () => {
console.log(`wild-dragon-node-agent v${VERSION}`);
console.log(` Listening :${AGENT_PORT}`);
console.log(` Host IP ${getIp() || '(unresolved)'}`);
console.log(` Primary ${MAM_API_URL}`);
console.log(` Role ${NODE_ROLE}`);
console.log(` Heartbeat every ${HEARTBEAT_MS / 1000}s`);
});