diff --git a/deploy/truenas/core/prom/rules/webrtc-alerts.yml b/deploy/truenas/core/prom/rules/webrtc-alerts.yml new file mode 100644 index 0000000..628a77d --- /dev/null +++ b/deploy/truenas/core/prom/rules/webrtc-alerts.yml @@ -0,0 +1,45 @@ +groups: + - name: dragonfork-webrtc + rules: + - alert: WebRTCWHEPErrorRateHigh + expr: | + sum by (stream_id) ( + rate(dragonfork_webrtc_whep_requests_total{code=~"4..|5.."}[5m]) + ) > 0.5 + for: 5m + labels: + severity: warning + annotations: + summary: "WHEP error rate high on stream {{ $labels.stream_id }}" + description: "Sustained 4xx/5xx rate >0.5/sec for 5m." + + - alert: WebRTCICEEstablishmentSlow + expr: | + histogram_quantile(0.95, + sum by (le, stream_id) ( + rate(dragonfork_webrtc_ice_establishment_duration_seconds_bucket[10m]) + ) + ) > 3 + for: 10m + labels: + severity: warning + annotations: + summary: "ICE establishment p95 >3s on {{ $labels.stream_id }}" + + - alert: WebRTCICEFailureRateHigh + expr: | + sum by (stream_id) (rate(dragonfork_webrtc_ice_failures_total[5m])) > 0.2 + for: 5m + labels: + severity: warning + annotations: + summary: "ICE failures sustained on {{ $labels.stream_id }}" + + - alert: WebRTCFFmpegLegFailure + expr: | + increase(dragonfork_webrtc_ffmpeg_leg_failures_total[5m]) > 0 + labels: + severity: critical + annotations: + summary: "FFmpeg RTP leg failed on {{ $labels.stream_id }} ({{ $labels.leg }})" + description: "Process stopped while peers were active. Check FFmpeg logs."