dragonflight/docker-compose.yml
Zac Gaetano fdec2e307d feat(worker): capability-routed GPU worker pool + per-node job attribution
WORKER_QUEUES env lets a worker subscribe to a subset of queues. Deploy one GPU-pinned container per card: heavy encodes (proxy/conform/trim) on Tesla P4 (zampp1) + L4 (zampp2) via NVENC; light jobs (thumbnail/filmstrip) on the 2x Quadro P400 (zampp1). BullMQ competing-consumers distribute across nodes. RUN_PROMOTION gates the growing-files scanner to one worker. Each worker stamps WORKER_LABEL onto job data so the Jobs UI Node column shows which node/GPU ran each job. Redis/DB/S3 for the zampp2 worker come from its .env (pointed at zampp1).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-05-29 04:00:10 +00:00

187 lines
5.4 KiB
YAML

services:
db:
image: postgres:16
environment:
POSTGRES_DB: ${POSTGRES_DB}
POSTGRES_USER: ${POSTGRES_USER}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
ports:
- "${PORT_DB:-5432}:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
- ./services/mam-api/src/db/schema.sql:/docker-entrypoint-initdb.d/001-schema.sql:ro
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER}"]
interval: 5s
timeout: 5s
retries: 5
networks:
- wild-dragon
queue:
image: redis:7-alpine
ports:
- "${PORT_REDIS:-6379}:6379"
volumes:
- redis_data:/data
networks:
- wild-dragon
mam-api:
build: ./services/mam-api
depends_on:
db:
condition: service_healthy
queue:
condition: service_started
ports:
- "${PORT_MAM_API:-7432}:3000"
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /mnt/NVME/MAM/wild-dragon-live:/live
- /mnt/NVME/MAM/wild-dragon-growing:/growing
- /mnt/NVME/MAM/sdk:/sdk
- /dev/shm:/dev/shm
- /run/dbus:/run/dbus
- /run/systemd:/run/systemd
- /usr/bin/nvidia-smi:/usr/bin/nvidia-smi:ro
environment:
DATABASE_URL: ${DATABASE_URL}
REDIS_URL: ${REDIS_URL}
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
SESSION_SECRET: ${SESSION_SECRET}
AUTH_ENABLED: ${AUTH_ENABLED:-false}
TRUST_PROXY: ${TRUST_PROXY:-false}
ALLOWED_ORIGINS: ${ALLOWED_ORIGINS:-}
DOCKER_NETWORK: wild-dragon_wild-dragon
NODE_IP: ${NODE_IP}
NODE_HOSTNAME: ${NODE_HOSTNAME:-}
CAPTURE_TOKEN: ${CAPTURE_TOKEN}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
networks:
- wild-dragon
capture:
build: ./services/capture
depends_on:
- mam-api
ports:
- "${PORT_CAPTURE:-7433}:3001"
- "${PORT_RTMP:-1935}:1935" # RTMP ingest (listener mode)
- "${PORT_SRT:-9000}:9000/udp" # SRT ingest (listener mode)
privileged: true
environment:
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
MAM_API_URL: ${MAM_API_URL:-http://mam-api:3000}
volumes:
- /mnt/NVME/MAM/wild-dragon-live:/live
- /dev/shm:/dev/shm
- /run/dbus:/run/dbus
- /run/systemd:/run/systemd
networks:
- wild-dragon
# ── GPU worker pool (capability-routed) ──────────────────────────────
# worker-p4: HEAVY tier (proxy/conform/trim) on the Tesla P4 (NVENC).
# Also runs the promotion scanner (RUN_PROMOTION) — exactly one worker must.
worker-p4:
build:
context: ./services/worker
dockerfile: Dockerfile.gpu
image: wild-dragon-worker-gpu:latest
runtime: nvidia
depends_on:
- queue
- db
environment:
REDIS_URL: ${REDIS_URL}
DATABASE_URL: ${DATABASE_URL}
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
GROWING_PATH: /growing
WORKER_QUEUES: proxy,conform,trim
RUN_PROMOTION: "true"
PROXY_CONCURRENCY: "2"
NVIDIA_VISIBLE_DEVICES: GPU-79afca3e-2ab2-a6ea-1c44-706c1f0a26d6
WORKER_LABEL: "zampp1 / Tesla P4"
NVIDIA_DRIVER_CAPABILITIES: video,compute,utility
volumes:
- /mnt/NVME/MAM/wild-dragon-growing:/growing
networks:
- wild-dragon
# worker-p400a/b: LIGHT tier (thumbnail/filmstrip) on the two Quadro P400s.
worker-p400a:
image: wild-dragon-worker-gpu:latest
runtime: nvidia
depends_on: [queue, db, worker-p4]
environment:
REDIS_URL: ${REDIS_URL}
DATABASE_URL: ${DATABASE_URL}
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
WORKER_QUEUES: thumbnail,filmstrip
NVIDIA_VISIBLE_DEVICES: GPU-331c53ea-2ed9-0007-e364-c1451775948f
WORKER_LABEL: "zampp1 / P400 #1"
NVIDIA_DRIVER_CAPABILITIES: video,compute,utility
networks:
- wild-dragon
worker-p400b:
image: wild-dragon-worker-gpu:latest
runtime: nvidia
depends_on: [queue, db, worker-p4]
environment:
REDIS_URL: ${REDIS_URL}
DATABASE_URL: ${DATABASE_URL}
S3_ENDPOINT: ${S3_ENDPOINT}
S3_BUCKET: ${S3_BUCKET}
S3_ACCESS_KEY: ${S3_ACCESS_KEY}
S3_SECRET_KEY: ${S3_SECRET_KEY}
S3_REGION: ${S3_REGION:-us-east-1}
WORKER_QUEUES: thumbnail,filmstrip
NVIDIA_VISIBLE_DEVICES: GPU-b514a592-9077-44bd-d9e8-9efa0591ef88
WORKER_LABEL: "zampp1 / P400 #2"
NVIDIA_DRIVER_CAPABILITIES: video,compute,utility
networks:
- wild-dragon
web-ui:
build: ./services/web-ui
ports:
- "${PORT_WEB_UI:-7434}:80"
volumes:
- /mnt/NVME/MAM/wild-dragon-live:/live
- /dev/shm:/dev/shm
- /run/dbus:/run/dbus
- /run/systemd:/run/systemd
networks:
- wild-dragon
volumes:
postgres_data:
redis_data:
networks:
wild-dragon:
driver: bridge