From 4beab3423d14167e23095b79d2d1ec77caeaacc3 Mon Sep 17 00:00:00 2001 From: ZGaetano Date: Wed, 6 May 2026 15:59:56 -0400 Subject: [PATCH] feat(deploy): add Grafana WebRTC health dashboard --- .../dashboards/dragonfork-webrtc-health.json | 213 ++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 deploy/truenas/core/grafana/dashboards/dragonfork-webrtc-health.json diff --git a/deploy/truenas/core/grafana/dashboards/dragonfork-webrtc-health.json b/deploy/truenas/core/grafana/dashboards/dragonfork-webrtc-health.json new file mode 100644 index 0000000..4c3f3a3 --- /dev/null +++ b/deploy/truenas/core/grafana/dashboards/dragonfork-webrtc-health.json @@ -0,0 +1,213 @@ +{ + "__inputs": [], + "__requires": [ + {"type": "grafana", "id": "grafana", "name": "Grafana", "version": "11.3.0"}, + {"type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0"} + ], + "annotations": {"list": []}, + "description": "Dragon Fork WebRTC egress health: WHEP API, ICE establishment, active streams/peers, capacity, and silent-degradation canary.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, + "id": 1, + "title": "WHEP API Health", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": { + "defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 0.1}]}}, + "overrides": [] + }, + "gridPos": {"h": 4, "w": 6, "x": 0, "y": 1}, + "id": 2, + "options": {"colorMode": "background", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "sum(rate(dragonfork_webrtc_whep_requests_total{code=~\"4..|5..\"}[5m]))", "legendFormat": "error rate/s"}], + "title": "WHEP Error Rate", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": {"defaults": {"unit": "reqps"}, "overrides": []}, + "gridPos": {"h": 8, "w": 9, "x": 6, "y": 1}, + "id": 3, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "sum by (route) (rate(dragonfork_webrtc_whep_requests_total{code=~\"2..\"}[5m]))", "legendFormat": "{{route}} 2xx"}, + {"expr": "sum by (route, code) (rate(dragonfork_webrtc_whep_requests_total{code=~\"4..|5..\"}[5m]))", "legendFormat": "{{route}} {{code}}"} + ], + "title": "WHEP Request Rate by Route", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": {"defaults": {"unit": "s"}, "overrides": []}, + "gridPos": {"h": 8, "w": 9, "x": 15, "y": 1}, + "id": 4, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "histogram_quantile(0.95, sum by (le, route) (rate(dragonfork_webrtc_whep_request_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{route}}"}, + {"expr": "histogram_quantile(0.50, sum by (le, route) (rate(dragonfork_webrtc_whep_request_duration_seconds_bucket[5m])))", "legendFormat": "p50 {{route}}"} + ], + "title": "WHEP Request Duration (p50/p95)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 9}, + "id": 10, + "title": "ICE Establishment", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": {"defaults": {"unit": "s"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 10}, + "id": 11, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "histogram_quantile(0.95, sum by (le, stream_id, result) (rate(dragonfork_webrtc_ice_establishment_duration_seconds_bucket[10m])))", "legendFormat": "p95 {{stream_id}} {{result}}"}, + {"expr": "histogram_quantile(0.50, sum by (le, stream_id, result) (rate(dragonfork_webrtc_ice_establishment_duration_seconds_bucket[10m])))", "legendFormat": "p50 {{stream_id}} {{result}}"} + ], + "title": "ICE Establishment Duration (p50/p95)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": {"defaults": {"unit": "cps"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 10}, + "id": 12, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "sum by (stream_id, reason) (rate(dragonfork_webrtc_ice_failures_total[5m]))", "legendFormat": "{{stream_id}} {{reason}}"} + ], + "title": "ICE Failure Rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 18}, + "id": 20, + "title": "Active Streams & Peers", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 19}, + "id": 21, + "options": {"colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "dragonfork_webrtc_active_streams", "legendFormat": "streams"}], + "title": "Active Streams", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 20, "x": 4, "y": 19}, + "id": 22, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "dragonfork_webrtc_active_peers", "legendFormat": "{{stream_id}}"} + ], + "title": "Active Peers per Stream", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 27}, + "id": 30, + "title": "Capacity & Rejections", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 4}, {"color": "red", "value": 8}]} + }, + "overrides": [] + }, + "gridPos": {"h": 4, "w": 4, "x": 0, "y": 28}, + "id": 31, + "options": {"colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}}, + "targets": [{"expr": "dragonfork_webrtc_udp_ports_in_use", "legendFormat": "in use"}], + "title": "UDP Ports In Use", + "type": "stat" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": {"defaults": {"unit": "cps"}, "overrides": []}, + "gridPos": {"h": 8, "w": 20, "x": 4, "y": 28}, + "id": 32, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "sum by (stream_id, scope) (rate(dragonfork_webrtc_cap_rejections_total[5m]))", "legendFormat": "{{stream_id}} {{scope}}"} + ], + "title": "Cap Rejection Rate (503s)", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": {"h": 1, "w": 24, "x": 0, "y": 36}, + "id": 40, + "title": "Silent Degradation Canary", + "type": "row" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": {"defaults": {"unit": "short"}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 0, "y": 37}, + "id": 41, + "options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}}, + "targets": [ + {"expr": "increase(dragonfork_webrtc_ffmpeg_leg_failures_total[5m])", "legendFormat": "{{stream_id}} {{leg}}"} + ], + "title": "FFmpeg RTP Leg Failures (5m window)", + "type": "timeseries" + }, + { + "datasource": {"type": "prometheus", "uid": "${datasource}"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}}, "overrides": []}, + "gridPos": {"h": 8, "w": 12, "x": 12, "y": 37}, + "id": 42, + "options": {"colorMode": "background", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["sum"]}}, + "targets": [ + {"expr": "sum by (stream_id, kind) (increase(dragonfork_webrtc_codec_mismatches_total[1h]))", "legendFormat": "{{stream_id}} {{kind}}"} + ], + "title": "Codec Mismatches (1h)", + "type": "stat" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": ["dragonfork", "webrtc"], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "type": "datasource" + } + ] + }, + "time": {"from": "now-1h", "to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Dragon Fork — WebRTC Health", + "uid": "dragonfork-webrtc-health", + "version": 1 +}