dragonflight/docker-compose.yml
Zac Gaetano fffff1c016 feat(cluster): install capture-card drivers/SDKs from the admin screen
Per-node "Capture Drivers / SDKs" panel installs Blackmagic / AJA / Deltacast
/ NDI drivers without SSH. node-agent gains NODE_TOKEN-gated /driver/install
+ /driver/status (spawns a one-shot privileged ubuntu container that bind-
mounts host kernel paths + the repo and runs deploy/install-driver.sh);
mam-api adds admin-gated /cluster/:id/install-driver + /driver-status.
Driver files live in-repo under sdk/<vendor>/ (private repo); binaries are
admin-supplied per each sdk/<vendor>/README.md. Vendor allowlist throughout.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-31 18:14:59 -04:00

211 lines
6.7 KiB
YAML

services:
db:
image: postgres:16
environment:
POSTGRES_DB: ${POSTGRES_DB}
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
ports:
- "${PORT_DB:-5432}:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
- ./services/mam-api/src/db/schema.sql:/docker-entrypoint-initdb.d/001-schema.sql:ro
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"]
interval: 5s
timeout: 5s
retries: 5
networks:
- wild-dragon
queue:
image: redis:7-alpine
ports:
- "${PORT_REDIS:-6379}:6379"
volumes:
- redis_data:/data
networks:
- wild-dragon
mam-api:
build: ./services/mam-api
depends_on:
db:
condition: service_healthy
queue:
condition: service_started
ports:
- "${PORT_MAM_API:-7432}:3000"
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /mnt/NVME/MAM/wild-dragon-live:/live
- /mnt/NVME/MAM/wild-dragon-growing:/growing
- /mnt/NVME/MAM/wild-dragon-media:/media
- /mnt/NVME/MAM/sdk:/sdk
- /dev/shm:/dev/shm
- /run/dbus:/run/dbus
- /run/systemd:/run/systemd
- /usr/bin/nvidia-smi:/usr/bin/nvidia-smi:ro
environment:
DATABASE_URL: ${DATABASE_URL}
REDIS_URL: ${REDIS_URL}
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
SESSION_SECRET: ${SESSION_SECRET}
AUTH_ENABLED: ${AUTH_ENABLED:-false}
TRUST_PROXY: ${TRUST_PROXY:-false}
ALLOWED_ORIGINS: ${ALLOWED_ORIGINS:-}
DOCKER_NETWORK: wild-dragon_wild-dragon
NODE_IP: ${NODE_IP}
NODE_HOSTNAME: ${NODE_HOSTNAME:-}
# Bearer mam-api forwards to a node-agent when installing capture drivers
# ("Capture Drivers / SDKs" panel). Set to the same value as the agents'
# NODE_TOKEN. If empty, agents with an empty NODE_TOKEN accept the call
# (dev); agents with a token will reject it (401).
NODE_AGENT_TOKEN: ${NODE_AGENT_TOKEN:-}
CAPTURE_TOKEN: ${CAPTURE_TOKEN}
PLAYOUT_IMAGE: ${PLAYOUT_IMAGE:-wild-dragon-playout:latest}
PLAYOUT_AMCP_BASE_PORT: ${PLAYOUT_AMCP_BASE_PORT:-5250}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
networks:
- wild-dragon
capture:
build: ./services/capture
depends_on:
- mam-api
ports:
- "${PORT_CAPTURE:-7433}:3001"
- "${PORT_RTMP:-1935}:1935" # RTMP ingest (listener mode)
- "${PORT_SRT:-9000}:9000/udp" # SRT ingest (listener mode)
privileged: true
environment:
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
MAM_API_URL: ${MAM_API_URL:-http://mam-api:3000}
volumes:
- /mnt/NVME/MAM/wild-dragon-live:/live
- /dev/shm:/dev/shm
- /run/dbus:/run/dbus
- /run/systemd:/run/systemd
networks:
- wild-dragon
# ── GPU worker pool (capability-routed) ──────────────────────────────
# worker-p4: HEAVY tier (proxy/conform/trim) on the Tesla P4 (NVENC).
# Also runs the promotion scanner (RUN_PROMOTION) — exactly one worker must.
worker-p4:
build:
context: ./services/worker
dockerfile: Dockerfile.gpu
image: wild-dragon-worker-gpu:latest
runtime: nvidia
depends_on:
- queue
- db
environment:
REDIS_URL: ${REDIS_URL}
DATABASE_URL: ${DATABASE_URL}
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
GROWING_PATH: /growing
# Includes `import` (YouTube importer): the import queue had no consumer
# after the capability-routing split, so import jobs sat unprocessed and
# assets stayed `ingesting` forever. import is concurrency-1 + network-
# bound, so one consumer (this heavy/primary worker) is sufficient.
WORKER_QUEUES: proxy,conform,trim,import,playout-stage
RUN_PROMOTION: "true"
PROXY_CONCURRENCY: "2"
PLAYOUT_MEDIA_DIR: /media
NVIDIA_VISIBLE_DEVICES: GPU-79afca3e-2ab2-a6ea-1c44-706c1f0a26d6
WORKER_LABEL: "zampp1 / Tesla P4"
NVIDIA_DRIVER_CAPABILITIES: video,compute,utility
volumes:
- /mnt/NVME/MAM/wild-dragon-growing:/growing
- /mnt/NVME/MAM/wild-dragon-media:/media
networks:
- wild-dragon
# worker-p400a/b: LIGHT tier (thumbnail/filmstrip) on the two Quadro P400s.
worker-p400a:
image: wild-dragon-worker-gpu:latest
runtime: nvidia
depends_on: [queue, db, worker-p4]
environment:
REDIS_URL: ${REDIS_URL}
DATABASE_URL: ${DATABASE_URL}
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
WORKER_QUEUES: thumbnail,filmstrip
NVIDIA_VISIBLE_DEVICES: GPU-331c53ea-2ed9-0007-e364-c1451775948f
WORKER_LABEL: "zampp1 / P400 #1"
NVIDIA_DRIVER_CAPABILITIES: video,compute,utility
networks:
- wild-dragon
worker-p400b:
image: wild-dragon-worker-gpu:latest
runtime: nvidia
depends_on: [queue, db, worker-p4]
environment:
REDIS_URL: ${REDIS_URL}
DATABASE_URL: ${DATABASE_URL}
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
WORKER_QUEUES: thumbnail,filmstrip
NVIDIA_VISIBLE_DEVICES: GPU-b514a592-9077-44bd-d9e8-9efa0591ef88
WORKER_LABEL: "zampp1 / P400 #2"
NVIDIA_DRIVER_CAPABILITIES: video,compute,utility
networks:
- wild-dragon
web-ui:
build: ./services/web-ui
ports:
- "${PORT_WEB_UI:-7434}:80"
volumes:
- /mnt/NVME/MAM/wild-dragon-live:/live
- /mnt/NVME/MAM/wild-dragon-media:/media:ro
- /dev/shm:/dev/shm
- /run/dbus:/run/dbus
- /run/systemd:/run/systemd
networks:
- wild-dragon
# Build-only: the CasparCG sidecar image. mam-api spawns these on-demand per
# channel (one container per playout channel), so this service is never up'd —
# it exists so `docker compose build playout` produces the image the API tags
# via PLAYOUT_IMAGE. Profile excludes it from default `up`.
playout:
profiles: ["build-only"]
build: ./services/playout
image: wild-dragon-playout:latest
volumes:
postgres_data:
redis_data:
networks:
wild-dragon:
driver: bridge