2026-05-20 13:48:18 -04:00
|
|
|
import express from 'express';
|
|
|
|
|
import os from 'os';
|
2026-05-20 14:18:07 -04:00
|
|
|
import fs from 'fs';
|
2026-05-20 13:48:18 -04:00
|
|
|
|
|
|
|
|
const MAM_API_URL = (process.env.MAM_API_URL || 'http://localhost:3000').replace(/\/$/, '');
|
|
|
|
|
const NODE_TOKEN = process.env.NODE_TOKEN || '';
|
|
|
|
|
const NODE_ROLE = process.env.NODE_ROLE || 'worker';
|
|
|
|
|
const AGENT_PORT = parseInt(process.env.AGENT_PORT || '3002', 10);
|
|
|
|
|
const HEARTBEAT_MS = parseInt(process.env.HEARTBEAT_MS || '30000', 10);
|
|
|
|
|
const VERSION = '1.0.0';
|
|
|
|
|
|
|
|
|
|
const app = express();
|
|
|
|
|
app.use(express.json());
|
|
|
|
|
|
|
|
|
|
// ── Health ────────────────────────────────────────────────────────────────
|
|
|
|
|
app.get('/health', (_req, res) => {
|
|
|
|
|
res.json({
|
|
|
|
|
ok: true,
|
|
|
|
|
hostname: os.hostname(),
|
|
|
|
|
uptime: Math.round(process.uptime()),
|
|
|
|
|
version: VERSION,
|
|
|
|
|
role: NODE_ROLE,
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// ── CPU sampling (500ms window) ───────────────────────────────────────────
|
|
|
|
|
function sampleCpu() {
|
|
|
|
|
return new Promise(resolve => {
|
|
|
|
|
const s1 = os.cpus();
|
|
|
|
|
setTimeout(() => {
|
|
|
|
|
const s2 = os.cpus();
|
|
|
|
|
let idle = 0, total = 0;
|
|
|
|
|
s2.forEach((cpu, i) => {
|
|
|
|
|
for (const t of Object.keys(cpu.times)) {
|
|
|
|
|
const d = cpu.times[t] - (s1[i]?.times[t] ?? 0);
|
|
|
|
|
total += d;
|
|
|
|
|
if (t === 'idle') idle += d;
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
resolve(total > 0 ? Math.round((1 - idle / total) * 10000) / 100 : 0);
|
|
|
|
|
}, 500);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function getIp() {
|
|
|
|
|
for (const ifaces of Object.values(os.networkInterfaces())) {
|
|
|
|
|
const hit = (ifaces || []).find(a => a.family === 'IPv4' && !a.internal);
|
|
|
|
|
if (hit) return hit.address;
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-20 14:18:07 -04:00
|
|
|
// ── Hardware detection ────────────────────────────────────────────────────
|
|
|
|
|
// GPU_COUNT env var overrides filesystem detection (useful when /dev not mapped into container)
|
|
|
|
|
// BMD_COUNT env var similarly overrides Blackmagic DeckLink detection
|
|
|
|
|
function detectHardware() {
|
|
|
|
|
const capabilities = { gpus: [], blackmagic: [] };
|
|
|
|
|
|
|
|
|
|
const gpuOverride = parseInt(process.env.GPU_COUNT || '-1', 10);
|
|
|
|
|
if (gpuOverride >= 0) {
|
|
|
|
|
for (let i = 0; i < gpuOverride; i++) {
|
|
|
|
|
capabilities.gpus.push({ device: `/dev/nvidia${i}`, type: 'nvidia', index: i });
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
for (let i = 0; i < 16; i++) {
|
|
|
|
|
try {
|
|
|
|
|
fs.accessSync(`/dev/nvidia${i}`, fs.constants.F_OK);
|
|
|
|
|
capabilities.gpus.push({ device: `/dev/nvidia${i}`, type: 'nvidia', index: i });
|
|
|
|
|
} catch (_) { break; }
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const bmdOverride = parseInt(process.env.BMD_COUNT || '-1', 10);
|
|
|
|
|
if (bmdOverride >= 0) {
|
|
|
|
|
for (let i = 0; i < bmdOverride; i++) {
|
|
|
|
|
capabilities.blackmagic.push({ device: `/dev/blackmagic/dv${i}`, index: i });
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
try {
|
|
|
|
|
const bmdEntries = fs.readdirSync('/dev/blackmagic');
|
|
|
|
|
capabilities.blackmagic = bmdEntries.map(d => ({ device: `/dev/blackmagic/${d}` }));
|
|
|
|
|
} catch (_) {}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return capabilities;
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-20 13:48:18 -04:00
|
|
|
// ── Heartbeat ─────────────────────────────────────────────────────────────
|
|
|
|
|
async function heartbeat() {
|
2026-05-20 14:18:07 -04:00
|
|
|
const cpu_usage = await sampleCpu();
|
|
|
|
|
const totalMem = os.totalmem();
|
|
|
|
|
const freeMem = os.freemem();
|
|
|
|
|
const ip_address = getIp();
|
|
|
|
|
const capabilities = detectHardware();
|
2026-05-20 13:48:18 -04:00
|
|
|
|
|
|
|
|
const payload = {
|
|
|
|
|
hostname: os.hostname(),
|
|
|
|
|
ip_address,
|
|
|
|
|
role: NODE_ROLE,
|
|
|
|
|
version: VERSION,
|
|
|
|
|
api_url: `http://${ip_address || os.hostname()}:${AGENT_PORT}`,
|
|
|
|
|
cpu_usage,
|
|
|
|
|
mem_used_mb: Math.round((totalMem - freeMem) / 1048576),
|
|
|
|
|
mem_total_mb: Math.round(totalMem / 1048576),
|
2026-05-20 14:18:07 -04:00
|
|
|
capabilities,
|
2026-05-20 13:48:18 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const headers = { 'Content-Type': 'application/json' };
|
|
|
|
|
if (NODE_TOKEN) headers['Authorization'] = `Bearer ${NODE_TOKEN}`;
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const res = await fetch(`${MAM_API_URL}/api/v1/cluster/heartbeat`, {
|
|
|
|
|
method: 'POST',
|
|
|
|
|
headers,
|
|
|
|
|
body: JSON.stringify(payload),
|
|
|
|
|
signal: AbortSignal.timeout(8000),
|
|
|
|
|
});
|
|
|
|
|
if (res.ok) {
|
2026-05-20 14:18:07 -04:00
|
|
|
const gpuStr = capabilities.gpus.length ? ` gpu=${capabilities.gpus.length}` : '';
|
|
|
|
|
const bmdStr = capabilities.blackmagic.length ? ` bmd=${capabilities.blackmagic.length}` : '';
|
2026-05-20 13:48:18 -04:00
|
|
|
process.stdout.write(
|
|
|
|
|
`[hb] ${payload.hostname} cpu=${cpu_usage}% ` +
|
2026-05-20 14:18:07 -04:00
|
|
|
`mem=${payload.mem_used_mb}/${payload.mem_total_mb}MB${gpuStr}${bmdStr}\n`
|
2026-05-20 13:48:18 -04:00
|
|
|
);
|
|
|
|
|
} else {
|
|
|
|
|
const txt = await res.text().catch(() => '');
|
|
|
|
|
console.error(`[hb] ${res.status} ${txt.slice(0, 120)}`);
|
|
|
|
|
}
|
|
|
|
|
} catch (err) {
|
|
|
|
|
console.error(`[hb] failed — ${err.message}`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
heartbeat();
|
|
|
|
|
setInterval(heartbeat, HEARTBEAT_MS);
|
|
|
|
|
|
|
|
|
|
app.listen(AGENT_PORT, () => {
|
|
|
|
|
console.log(`wild-dragon-node-agent v${VERSION}`);
|
|
|
|
|
console.log(` Listening :${AGENT_PORT}`);
|
|
|
|
|
console.log(` Primary ${MAM_API_URL}`);
|
|
|
|
|
console.log(` Role ${NODE_ROLE}`);
|
|
|
|
|
console.log(` Heartbeat every ${HEARTBEAT_MS / 1000}s`);
|
|
|
|
|
});
|