feat(deploy): add Grafana WebRTC health dashboard
Some checks failed
ci / race tests (push) Blocked by required conditions
ci / WebRTC smoke (5-viewer fanout) (push) Blocked by required conditions
ci / WebRTC latency p95 gate (push) Blocked by required conditions
ci / vet + build (push) Has been cancelled

This commit is contained in:
Zac Gaetano 2026-05-06 15:59:56 -04:00
parent 6b637a35e6
commit 4beab3423d

View file

@ -0,0 +1,213 @@
{
"__inputs": [],
"__requires": [
{"type": "grafana", "id": "grafana", "name": "Grafana", "version": "11.3.0"},
{"type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0"}
],
"annotations": {"list": []},
"description": "Dragon Fork WebRTC egress health: WHEP API, ICE establishment, active streams/peers, capacity, and silent-degradation canary.",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 0},
"id": 1,
"title": "WHEP API Health",
"type": "row"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {
"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 0.1}]}},
"overrides": []
},
"gridPos": {"h": 4, "w": 6, "x": 0, "y": 1},
"id": 2,
"options": {"colorMode": "background", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}},
"targets": [{"expr": "sum(rate(dragonfork_webrtc_whep_requests_total{code=~\"4..|5..\"}[5m]))", "legendFormat": "error rate/s"}],
"title": "WHEP Error Rate",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {"defaults": {"unit": "reqps"}, "overrides": []},
"gridPos": {"h": 8, "w": 9, "x": 6, "y": 1},
"id": 3,
"options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}},
"targets": [
{"expr": "sum by (route) (rate(dragonfork_webrtc_whep_requests_total{code=~\"2..\"}[5m]))", "legendFormat": "{{route}} 2xx"},
{"expr": "sum by (route, code) (rate(dragonfork_webrtc_whep_requests_total{code=~\"4..|5..\"}[5m]))", "legendFormat": "{{route}} {{code}}"}
],
"title": "WHEP Request Rate by Route",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {"defaults": {"unit": "s"}, "overrides": []},
"gridPos": {"h": 8, "w": 9, "x": 15, "y": 1},
"id": 4,
"options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}},
"targets": [
{"expr": "histogram_quantile(0.95, sum by (le, route) (rate(dragonfork_webrtc_whep_request_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{route}}"},
{"expr": "histogram_quantile(0.50, sum by (le, route) (rate(dragonfork_webrtc_whep_request_duration_seconds_bucket[5m])))", "legendFormat": "p50 {{route}}"}
],
"title": "WHEP Request Duration (p50/p95)",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 9},
"id": 10,
"title": "ICE Establishment",
"type": "row"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {"defaults": {"unit": "s"}, "overrides": []},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 10},
"id": 11,
"options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}},
"targets": [
{"expr": "histogram_quantile(0.95, sum by (le, stream_id, result) (rate(dragonfork_webrtc_ice_establishment_duration_seconds_bucket[10m])))", "legendFormat": "p95 {{stream_id}} {{result}}"},
{"expr": "histogram_quantile(0.50, sum by (le, stream_id, result) (rate(dragonfork_webrtc_ice_establishment_duration_seconds_bucket[10m])))", "legendFormat": "p50 {{stream_id}} {{result}}"}
],
"title": "ICE Establishment Duration (p50/p95)",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {"defaults": {"unit": "cps"}, "overrides": []},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 10},
"id": 12,
"options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}},
"targets": [
{"expr": "sum by (stream_id, reason) (rate(dragonfork_webrtc_ice_failures_total[5m]))", "legendFormat": "{{stream_id}} {{reason}}"}
],
"title": "ICE Failure Rate",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 18},
"id": 20,
"title": "Active Streams & Peers",
"type": "row"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []},
"gridPos": {"h": 4, "w": 4, "x": 0, "y": 19},
"id": 21,
"options": {"colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}},
"targets": [{"expr": "dragonfork_webrtc_active_streams", "legendFormat": "streams"}],
"title": "Active Streams",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {"defaults": {"unit": "short"}, "overrides": []},
"gridPos": {"h": 8, "w": 20, "x": 4, "y": 19},
"id": 22,
"options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}},
"targets": [
{"expr": "dragonfork_webrtc_active_peers", "legendFormat": "{{stream_id}}"}
],
"title": "Active Peers per Stream",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 27},
"id": 30,
"title": "Capacity & Rejections",
"type": "row"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 4}, {"color": "red", "value": 8}]}
},
"overrides": []
},
"gridPos": {"h": 4, "w": 4, "x": 0, "y": 28},
"id": 31,
"options": {"colorMode": "background", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}},
"targets": [{"expr": "dragonfork_webrtc_udp_ports_in_use", "legendFormat": "in use"}],
"title": "UDP Ports In Use",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {"defaults": {"unit": "cps"}, "overrides": []},
"gridPos": {"h": 8, "w": 20, "x": 4, "y": 28},
"id": 32,
"options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}},
"targets": [
{"expr": "sum by (stream_id, scope) (rate(dragonfork_webrtc_cap_rejections_total[5m]))", "legendFormat": "{{stream_id}} {{scope}}"}
],
"title": "Cap Rejection Rate (503s)",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {"h": 1, "w": 24, "x": 0, "y": 36},
"id": 40,
"title": "Silent Degradation Canary",
"type": "row"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {"defaults": {"unit": "short"}, "overrides": []},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 37},
"id": 41,
"options": {"legend": {"displayMode": "list", "placement": "bottom"}, "tooltip": {"mode": "multi"}},
"targets": [
{"expr": "increase(dragonfork_webrtc_ffmpeg_leg_failures_total[5m])", "legendFormat": "{{stream_id}} {{leg}}"}
],
"title": "FFmpeg RTP Leg Failures (5m window)",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "${datasource}"},
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "red", "value": 1}]}}, "overrides": []},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 37},
"id": 42,
"options": {"colorMode": "background", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["sum"]}},
"targets": [
{"expr": "sum by (stream_id, kind) (increase(dragonfork_webrtc_codec_mismatches_total[1h]))", "legendFormat": "{{stream_id}} {{kind}}"}
],
"title": "Codec Mismatches (1h)",
"type": "stat"
}
],
"refresh": "30s",
"schemaVersion": 39,
"tags": ["dragonfork", "webrtc"],
"templating": {
"list": [
{
"current": {},
"hide": 0,
"includeAll": false,
"label": "Datasource",
"name": "datasource",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
}
]
},
"time": {"from": "now-1h", "to": "now"},
"timepicker": {},
"timezone": "browser",
"title": "Dragon Fork — WebRTC Health",
"uid": "dragonfork-webrtc-health",
"version": 1
}