feat(deploy): add Prometheus + Grafana observability stack (closes #11)
Some checks failed
ci / race tests (push) Blocked by required conditions
ci / WebRTC smoke (5-viewer fanout) (push) Blocked by required conditions
ci / WebRTC latency p95 gate (push) Blocked by required conditions
ci / vet + build (push) Has been cancelled

This commit is contained in:
Zac Gaetano 2026-05-06 16:00:15 -04:00
parent 4beab3423d
commit 28a280b9b3

View file

@ -1,13 +1,12 @@
# Dragon Fork datarhei Core — M2 deployment with WebRTC egress.
# Dragon Fork datarhei Core — v0.2 deployment with WebRTC egress and observability.
#
# This replaces the M1 webrtc-poc stack. It runs the real root Core
# binary with the WebRTC subsystem wired into the restream manager, so
# every process whose config has `webrtc.enabled=true` will have its
# output fanned out to WHEP subscribers automatically.
# This replaces the M2 stack. Adds Prometheus and Grafana containers so the
# operator can answer "is WebRTC healthy right now?" from a single dashboard
# without tailing logs or hitting the API.
#
# Host networking is required for the same reason as M1: ICE encodes
# host:port pairs into SDP candidates, and bridge-mode port mapping
# breaks that.
# Host networking is required for WebRTC ICE (see deploy/truenas/docker-compose.yml).
# Prometheus and Grafana sit on a bridge network (dragonfork-mon) and reach
# Core via host.docker.internal:CORE_HTTP_PORT.
#
# Copy this file to /mnt/NVME/Docker/dragonfork-core/ alongside a .env:
#
@ -15,6 +14,7 @@
# API_AUTH_USERNAME=admin
# API_AUTH_PASSWORD=change-me-please
# API_AUTH_JWT_SECRET=<32+ random bytes, base64>
# GRAFANA_ADMIN_PASSWORD=$(openssl rand -base64 24)
#
# Then:
# docker compose up -d --build
@ -39,23 +39,13 @@ services:
# --- WebRTC egress ---
CORE_WEBRTC_ENABLE: "true"
CORE_WEBRTC_PUBLIC_IP: "${PUBLIC_IP:?set in .env}"
# Leave NAT1To1_IPS empty unless you need multiple advertised IPs.
# CORE_WEBRTC_NAT_1_TO_1_IPS: "10.0.0.25 203.0.113.10"
# --- RTMP / RTMPS / SRT / TLS port overrides ---
# Default Datarhei ports (1935, 1936, 6000, 8181) are common
# and frequently collide with an existing upstream datarhei/
# restreamer container or other RTMP servers on the same host.
# Pull these out of .env so operators can remap without editing
# this file. Empty strings keep the upstream defaults.
# --- Port overrides ---
CORE_RTMP_ADDRESS: "${CORE_RTMP_ADDRESS:-:1935}"
CORE_RTMP_ADDRESS_TLS: "${CORE_RTMP_ADDRESS_TLS:-:1936}"
CORE_SRT_ADDRESS: "${CORE_SRT_ADDRESS:-:6000}"
CORE_TLS_ADDRESS: "${CORE_TLS_ADDRESS:-:8181}"
# --- Storage ---
# Let the volumes below provide durable paths; defaults are fine.
# --- Logging ---
CORE_LOG_LEVEL: "${LOG_LEVEL:-info}"
@ -63,5 +53,47 @@ services:
- ./config:/core/config
- ./data:/core/data
# No ports: host networking exposes whatever the process binds.
# The WHEP endpoint lives at /api/v3/whep/:id on CORE_HTTP_PORT.
prom:
image: prom/prometheus:v2.55.0
container_name: dragonfork-prom
restart: unless-stopped
networks: [dragonfork-mon]
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- ./prom/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prom/rules:/etc/prometheus/rules:ro
- prom-data:/prometheus
command:
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.retention.time=${PROM_RETENTION:-15d}
- --storage.tsdb.path=/prometheus
- --web.console.libraries=/usr/share/prometheus/console_libraries
- --web.console.templates=/usr/share/prometheus/consoles
ports:
- "${PROM_PORT:-9090}:9090"
grafana:
image: grafana/grafana-oss:11.3.0
container_name: dragonfork-grafana
restart: unless-stopped
networks: [dragonfork-mon]
depends_on: [prom]
environment:
GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:?set in .env}"
GF_USERS_ALLOW_SIGN_UP: "false"
GF_AUTH_ANONYMOUS_ENABLED: "false"
volumes:
- ./grafana/provisioning:/etc/grafana/provisioning:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
- grafana-data:/var/lib/grafana
ports:
- "${GRAFANA_PORT:-3000}:3000"
networks:
dragonfork-mon:
driver: bridge
volumes:
prom-data:
grafana-data: