From f2d5f5aa1651cd8b6b20af43b6ea12e7087b5641 Mon Sep 17 00:00:00 2001 From: Zac Gaetano Date: Sun, 31 May 2026 18:02:29 -0400 Subject: [PATCH] feat(cluster): auto-assign node profiles from detected hardware MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit onboard-node.sh auto-detects GPU (nvidia-smi/lspci) and SDI capture cards (blackmagic/deltacast) and computes PROFILES (worker [+gpu] [+capture]) automatically; explicit NODE_ROLE/PROFILES still override. Add Node wizard drops the role picker — node self-configures from hardware. Co-Authored-By: Claude Opus 4.8 --- deploy/onboard-node.sh | 69 +++++++++++++++++++++++- services/web-ui/public/screens-admin.jsx | 41 ++++---------- 2 files changed, 76 insertions(+), 34 deletions(-) diff --git a/deploy/onboard-node.sh b/deploy/onboard-node.sh index 5208c4d..c374c2b 100644 --- a/deploy/onboard-node.sh +++ b/deploy/onboard-node.sh @@ -16,11 +16,12 @@ # Environment variables: # MAM_API_URL REQUIRED Primary MAM API base URL # NODE_TOKEN API bearer token (required if AUTH_ENABLED=true) -# NODE_ROLE Role tag reported to the cluster (default: worker) +# NODE_ROLE Role tag reported to the cluster (default: auto-detect) # NODE_IP Override the LAN IP reported back (default: auto-detect) # AGENT_PORT Host port for the node agent (default: 7436) # INSTALL_DIR Where to clone/find the repo (default: /opt/wild-dragon) -# PROFILES Extra compose profiles, space-sep e.g. "worker capture" +# PROFILES Compose profiles, space-sep (default: auto-detect from hardware) +# Override only to force, e.g. "worker capture" # BMD_MODEL DeckLink card model name (e.g. "DeckLink Duo 2") # REPO_URL Override the Forgejo clone URL # ============================================================================= @@ -32,6 +33,9 @@ REPO_URL="${REPO_URL:-https://forge.wilddragon.net/zgaetano/wild-dragon.git}" INSTALL_DIR="${INSTALL_DIR:-/opt/wild-dragon}" MAM_API_URL="${MAM_API_URL:-}" NODE_TOKEN="${NODE_TOKEN:-}" +# Track whether the caller pinned NODE_ROLE explicitly (manual override) vs. +# us defaulting it — so auto-detection only fills in an *unset* role. +[[ -n "${NODE_ROLE:-}" ]] && NODE_ROLE_EXPLICIT=1 || NODE_ROLE_EXPLICIT="" NODE_ROLE="${NODE_ROLE:-worker}" NODE_IP="${NODE_IP:-}" AGENT_PORT="${AGENT_PORT:-7436}" @@ -65,6 +69,37 @@ detect_lan_ip() { echo "$ip" } +# ── Auto-detect hardware ───────────────────────────────────────────────────── +# Mirror detect_lan_ip's style: best-effort, guard every probe with `|| true` +# so a missing nvidia-smi/lspci never aborts under `set -euo pipefail`. The +# node self-describes its hardware here so the operator never has to pick a +# role — the right compose profiles are enabled automatically. + +# GPU present? nvidia-smi is the strong signal; fall back to an lspci scan for +# NVIDIA or AMD VGA controllers (covers nodes where the driver isn't installed +# yet but the card is physically present). +detect_gpu() { + if command -v nvidia-smi &>/dev/null && nvidia-smi -L &>/dev/null; then + return 0 + fi + if command -v lspci &>/dev/null; then + if lspci 2>/dev/null | grep -iE 'nvidia|vga.*amd' &>/dev/null; then + return 0 + fi + fi + return 1 +} + +# SDI capture card present? Blackmagic DeckLink or Deltacast, via lspci. +detect_sdi() { + if command -v lspci &>/dev/null; then + if lspci 2>/dev/null | grep -iE 'blackmagic|deltacast' &>/dev/null; then + return 0 + fi + fi + return 1 +} + # ── Preflight ──────────────────────────────────────────────────────────────── echo -e "\n${BLD}${CYN}Wild Dragon MAM — Cluster Node Onboarding${NC}\n" @@ -79,6 +114,36 @@ if [[ -z "$NODE_IP" ]]; then fi fi +# ── Auto-assign compose profiles from detected hardware ────────────────────── +# Operator never picks a role: the worker profile always runs, and we add the +# gpu / capture profiles only when the matching hardware is present. Explicit +# PROFILES / NODE_ROLE env vars are honoured as a manual override escape hatch. +HAS_GPU=false; HAS_SDI=false +detect_gpu && HAS_GPU=true || true +detect_sdi && HAS_SDI=true || true + +DETECTED_DESC="CPU" +[[ "$HAS_GPU" == true ]] && DETECTED_DESC="$DETECTED_DESC, GPU" +[[ "$HAS_SDI" == true ]] && DETECTED_DESC="$DETECTED_DESC, SDI capture card" + +if [[ -z "$PROFILES" ]]; then + AUTO_PROFILES="worker" + [[ "$HAS_GPU" == true ]] && AUTO_PROFILES="$AUTO_PROFILES gpu" + [[ "$HAS_SDI" == true ]] && AUTO_PROFILES="$AUTO_PROFILES capture" + PROFILES="$AUTO_PROFILES" + info "Detected: $DETECTED_DESC → profiles: $PROFILES" +else + info "Detected: $DETECTED_DESC (profiles overridden by env: $PROFILES)" +fi + +# Derive a human-friendly role tag from detected hardware when not pinned. +# Capture cards win over GPU (an SDI ingest node is the more specific role). +if [[ -z "$NODE_ROLE_EXPLICIT" ]]; then + if [[ "$HAS_SDI" == true ]]; then NODE_ROLE="capture" + elif [[ "$HAS_GPU" == true ]]; then NODE_ROLE="gpu" + else NODE_ROLE="worker"; fi +fi + info "Primary API : $MAM_API_URL" info "Role : $NODE_ROLE" info "Agent port : $AGENT_PORT" diff --git a/services/web-ui/public/screens-admin.jsx b/services/web-ui/public/screens-admin.jsx index 4887916..4bbe56a 100644 --- a/services/web-ui/public/screens-admin.jsx +++ b/services/web-ui/public/screens-admin.jsx @@ -1597,23 +1597,15 @@ function Cluster() { ); } -// AddNodeModal — Approach A onboarding wizard. Collects a node name + role, -// mints a one-time auth token via /auth/tokens, and renders a ready-to-paste +// AddNodeModal — Approach A onboarding wizard. Collects a node name, mints a +// one-time auth token via /auth/tokens, and renders a ready-to-paste // `curl … | bash` command that provisions the machine via deploy/onboard-node.sh. // -// Role → compose PROFILES mapping (see docker-compose.worker.yml): -// Worker → "worker" -// Capture → "worker capture" -// GPU → "worker gpu" (worker-l4 service, profiles: [gpu]) -const ADD_NODE_ROLES = [ - { id: 'worker', label: 'Worker', profiles: 'worker', desc: 'CPU transcode / general jobs' }, - { id: 'capture', label: 'Capture', profiles: 'worker capture', desc: 'SDI / DeckLink ingest' }, - { id: 'gpu', label: 'GPU', profiles: 'worker gpu', desc: 'NVENC-accelerated transcode' }, -]; - +// No role picker: the new node self-detects its hardware (GPU / DeckLink / +// Deltacast) in onboard-node.sh and auto-enables the matching compose profiles +// (worker always; + gpu / + capture when present). Zero manual choice. function AddNodeModal({ onClose }) { const [nodeName, setNodeName] = React.useState(''); - const [role, setRole] = React.useState('worker'); const [apiUrl, setApiUrl] = React.useState(''); const [info, setInfo] = React.useState(null); // { scriptUrl, branch } const [command, setCommand] = React.useState(null); // generated string @@ -1631,8 +1623,6 @@ function AddNodeModal({ onClose }) { .catch(() => {}); // leave apiUrl empty → user must fill it before Generate }, []); - const roleDef = ADD_NODE_ROLES.find(r => r.id === role) || ADD_NODE_ROLES[0]; - const generate = async () => { setError(null); if (!nodeName.trim()) { setError('Node name is required.'); return; } @@ -1654,8 +1644,7 @@ function AddNodeModal({ onClose }) { const scriptUrl = (info && info.scriptUrl) || 'https://forge.wilddragon.net/zgaetano/wild-dragon/raw/branch/main/deploy/onboard-node.sh'; const cmd = - `curl -sL ${scriptUrl} | NODE_TOKEN=${token} MAM_API_URL=${apiUrl.trim()} ` + - `NODE_ROLE=${role} PROFILES="${roleDef.profiles}" bash`; + `curl -sL ${scriptUrl} | NODE_TOKEN=${token} MAM_API_URL=${apiUrl.trim()} bash`; setCommand(cmd); } catch (e) { setError(e.message || 'Network error'); @@ -1688,21 +1677,6 @@ function AddNodeModal({ onClose }) { value={nodeName} onChange={e => setNodeName(e.target.value)} /> -
- -
- {ADD_NODE_ROLES.map(rd => ( - - ))} -
-
-
{command} +
+ Profiles (worker / capture / GPU) are auto-selected from the new machine's detected hardware — no need to choose. +
  1. SSH into the fresh Ubuntu machine.
  2. Paste and run this command.