From 068e2eaa87aec9ab3bb3393fd543e35be922ce24 Mon Sep 17 00:00:00 2001 From: Zac Gaetano Date: Mon, 1 Jun 2026 09:00:34 -0400 Subject: [PATCH] fix(node-agent): NODE_NAME override to prevent cloned-VM hostname collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cluster heartbeat upserts cluster_nodes ON CONFLICT (hostname), so two machines reporting the same os.hostname() clobber each other's row. A cloned capture VM whose /etc/hostname was "zampp1" (same as the primary) caused its 4 DeckLink cards to land on the primary's row, then get overwritten by the primary's cardless heartbeat — so the New Recorder modal showed "No SDI devices auto-detected" despite healthy hardware. - node-agent now reports process.env.NODE_NAME || os.hostname() as its cluster identity, so node identity is explicit and collision-proof. - docker-compose.worker.yml exposes NODE_NAME to the container. - onboard-node.sh always writes NODE_NAME to the node .env (defaults to the OS hostname) so future onboarding pins identity even on cloned images. Live remediation already applied to the zampp2 capture node: compose hostname pinned to zampp2 and its node token rebound to zampp2; DB now reports bmd=4 for zampp2. Co-Authored-By: Claude Opus 4.8 --- deploy/onboard-node.sh | 6 ++++++ docker-compose.worker.yml | 4 ++++ services/node-agent/index.js | 14 ++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/deploy/onboard-node.sh b/deploy/onboard-node.sh index c374c2b..4ea5aa6 100644 --- a/deploy/onboard-node.sh +++ b/deploy/onboard-node.sh @@ -38,6 +38,11 @@ NODE_TOKEN="${NODE_TOKEN:-}" [[ -n "${NODE_ROLE:-}" ]] && NODE_ROLE_EXPLICIT=1 || NODE_ROLE_EXPLICIT="" NODE_ROLE="${NODE_ROLE:-worker}" NODE_IP="${NODE_IP:-}" +# NODE_NAME pins this node's cluster identity (the heartbeat key). Default to the +# OS hostname, but ALWAYS write it explicitly so cloned VMs that share an +# /etc/hostname (e.g. two boxes both named "zampp1") don't collide on the same +# cluster_nodes row — which silently hides the capture node's DeckLink devices. +NODE_NAME="${NODE_NAME:-$(hostname)}" AGENT_PORT="${AGENT_PORT:-7436}" PROFILES="${PROFILES:-}" BMD_MODEL="${BMD_MODEL:-}" @@ -200,6 +205,7 @@ info "Writing $ENV_FILE" echo "MAM_API_URL=$MAM_API_URL" echo "NODE_TOKEN=$NODE_TOKEN" echo "NODE_ROLE=$NODE_ROLE" + echo "NODE_NAME=$NODE_NAME" echo "NODE_IP=$NODE_IP" echo "AGENT_PORT=$AGENT_PORT" echo "HEARTBEAT_MS=30000" diff --git a/docker-compose.worker.yml b/docker-compose.worker.yml index 895c906..278cc07 100644 --- a/docker-compose.worker.yml +++ b/docker-compose.worker.yml @@ -47,6 +47,10 @@ services: MAM_API_URL: ${MAM_API_URL} NODE_TOKEN: ${NODE_TOKEN:-} NODE_ROLE: ${NODE_ROLE:-worker} + # NODE_NAME pins the cluster identity (heartbeat key). Set it per-node so + # cloned VMs that share /etc/hostname don't collide on the same + # cluster_nodes row. Falls back to the OS hostname when unset. + NODE_NAME: ${NODE_NAME:-} NODE_IP: ${NODE_IP:-} AGENT_PORT: ${AGENT_PORT:-7436} HEARTBEAT_MS: ${HEARTBEAT_MS:-30000} diff --git a/services/node-agent/index.js b/services/node-agent/index.js index 18f5660..4453016 100644 --- a/services/node-agent/index.js +++ b/services/node-agent/index.js @@ -5,6 +5,16 @@ import fs from 'fs'; const MAM_API_URL = (process.env.MAM_API_URL || 'http://localhost:3000').replace(/\/$/, ''); const NODE_TOKEN = process.env.NODE_TOKEN || ''; const NODE_ROLE = process.env.NODE_ROLE || 'worker'; +// Cluster identity. The heartbeat keys cluster_nodes on hostname (ON CONFLICT +// (hostname)), so two machines reporting the SAME os.hostname() clobber each +// other's row — exactly what happens with cloned VMs that share /etc/hostname +// (e.g. two boxes both named "zampp1"). The capture node's DeckLink capability +// then lands on the wrong row and gets overwritten by the primary's cardless +// heartbeat, so the recorder UI shows "No SDI devices auto-detected". +// NODE_NAME (set per-node by onboard-node.sh / the node's .env) overrides +// os.hostname() so identity is explicit and collision-proof. Falls back to the +// OS hostname when unset, preserving existing single-host behaviour. +const NODE_NAME = process.env.NODE_NAME || os.hostname(); const AGENT_PORT = parseInt(process.env.AGENT_PORT || '7436', 10); const HEARTBEAT_MS = parseInt(process.env.HEARTBEAT_MS || '30000', 10); const LIVE_DIR = process.env.LIVE_DIR || '/mnt/NVME/MAM/wild-dragon-live'; @@ -711,11 +721,11 @@ async function heartbeat() { } const payload = { - hostname: os.hostname(), + hostname: NODE_NAME, ip_address, role: NODE_ROLE, version: VERSION, - api_url: `http://${ip_address || os.hostname()}:${AGENT_PORT}`, + api_url: `http://${ip_address || NODE_NAME}:${AGENT_PORT}`, cpu_usage, mem_used_mb: Math.round((totalMem - freeMem) / 1048576), mem_total_mb: Math.round(totalMem / 1048576),