fix(node-agent): NODE_NAME override to prevent cloned-VM hostname collision
The cluster heartbeat upserts cluster_nodes ON CONFLICT (hostname), so two machines reporting the same os.hostname() clobber each other's row. A cloned capture VM whose /etc/hostname was "zampp1" (same as the primary) caused its 4 DeckLink cards to land on the primary's row, then get overwritten by the primary's cardless heartbeat — so the New Recorder modal showed "No SDI devices auto-detected" despite healthy hardware. - node-agent now reports process.env.NODE_NAME || os.hostname() as its cluster identity, so node identity is explicit and collision-proof. - docker-compose.worker.yml exposes NODE_NAME to the container. - onboard-node.sh always writes NODE_NAME to the node .env (defaults to the OS hostname) so future onboarding pins identity even on cloned images. Live remediation already applied to the zampp2 capture node: compose hostname pinned to zampp2 and its node token rebound to zampp2; DB now reports bmd=4 for zampp2. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
e3be8745d3
commit
068e2eaa87
3 changed files with 22 additions and 2 deletions
|
|
@ -38,6 +38,11 @@ NODE_TOKEN="${NODE_TOKEN:-}"
|
|||
[[ -n "${NODE_ROLE:-}" ]] && NODE_ROLE_EXPLICIT=1 || NODE_ROLE_EXPLICIT=""
|
||||
NODE_ROLE="${NODE_ROLE:-worker}"
|
||||
NODE_IP="${NODE_IP:-}"
|
||||
# NODE_NAME pins this node's cluster identity (the heartbeat key). Default to the
|
||||
# OS hostname, but ALWAYS write it explicitly so cloned VMs that share an
|
||||
# /etc/hostname (e.g. two boxes both named "zampp1") don't collide on the same
|
||||
# cluster_nodes row — which silently hides the capture node's DeckLink devices.
|
||||
NODE_NAME="${NODE_NAME:-$(hostname)}"
|
||||
AGENT_PORT="${AGENT_PORT:-7436}"
|
||||
PROFILES="${PROFILES:-}"
|
||||
BMD_MODEL="${BMD_MODEL:-}"
|
||||
|
|
@ -200,6 +205,7 @@ info "Writing $ENV_FILE"
|
|||
echo "MAM_API_URL=$MAM_API_URL"
|
||||
echo "NODE_TOKEN=$NODE_TOKEN"
|
||||
echo "NODE_ROLE=$NODE_ROLE"
|
||||
echo "NODE_NAME=$NODE_NAME"
|
||||
echo "NODE_IP=$NODE_IP"
|
||||
echo "AGENT_PORT=$AGENT_PORT"
|
||||
echo "HEARTBEAT_MS=30000"
|
||||
|
|
|
|||
|
|
@ -47,6 +47,10 @@ services:
|
|||
MAM_API_URL: ${MAM_API_URL}
|
||||
NODE_TOKEN: ${NODE_TOKEN:-}
|
||||
NODE_ROLE: ${NODE_ROLE:-worker}
|
||||
# NODE_NAME pins the cluster identity (heartbeat key). Set it per-node so
|
||||
# cloned VMs that share /etc/hostname don't collide on the same
|
||||
# cluster_nodes row. Falls back to the OS hostname when unset.
|
||||
NODE_NAME: ${NODE_NAME:-}
|
||||
NODE_IP: ${NODE_IP:-}
|
||||
AGENT_PORT: ${AGENT_PORT:-7436}
|
||||
HEARTBEAT_MS: ${HEARTBEAT_MS:-30000}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,16 @@ import fs from 'fs';
|
|||
const MAM_API_URL = (process.env.MAM_API_URL || 'http://localhost:3000').replace(/\/$/, '');
|
||||
const NODE_TOKEN = process.env.NODE_TOKEN || '';
|
||||
const NODE_ROLE = process.env.NODE_ROLE || 'worker';
|
||||
// Cluster identity. The heartbeat keys cluster_nodes on hostname (ON CONFLICT
|
||||
// (hostname)), so two machines reporting the SAME os.hostname() clobber each
|
||||
// other's row — exactly what happens with cloned VMs that share /etc/hostname
|
||||
// (e.g. two boxes both named "zampp1"). The capture node's DeckLink capability
|
||||
// then lands on the wrong row and gets overwritten by the primary's cardless
|
||||
// heartbeat, so the recorder UI shows "No SDI devices auto-detected".
|
||||
// NODE_NAME (set per-node by onboard-node.sh / the node's .env) overrides
|
||||
// os.hostname() so identity is explicit and collision-proof. Falls back to the
|
||||
// OS hostname when unset, preserving existing single-host behaviour.
|
||||
const NODE_NAME = process.env.NODE_NAME || os.hostname();
|
||||
const AGENT_PORT = parseInt(process.env.AGENT_PORT || '7436', 10);
|
||||
const HEARTBEAT_MS = parseInt(process.env.HEARTBEAT_MS || '30000', 10);
|
||||
const LIVE_DIR = process.env.LIVE_DIR || '/mnt/NVME/MAM/wild-dragon-live';
|
||||
|
|
@ -711,11 +721,11 @@ async function heartbeat() {
|
|||
}
|
||||
|
||||
const payload = {
|
||||
hostname: os.hostname(),
|
||||
hostname: NODE_NAME,
|
||||
ip_address,
|
||||
role: NODE_ROLE,
|
||||
version: VERSION,
|
||||
api_url: `http://${ip_address || os.hostname()}:${AGENT_PORT}`,
|
||||
api_url: `http://${ip_address || NODE_NAME}:${AGENT_PORT}`,
|
||||
cpu_usage,
|
||||
mem_used_mb: Math.round((totalMem - freeMem) / 1048576),
|
||||
mem_total_mb: Math.round(totalMem / 1048576),
|
||||
|
|
|
|||
Loading…
Reference in a new issue