feat(deploy): add WebRTC Prometheus alert rules
Some checks failed
ci / race tests (push) Blocked by required conditions
ci / WebRTC smoke (5-viewer fanout) (push) Blocked by required conditions
ci / WebRTC latency p95 gate (push) Blocked by required conditions
ci / vet + build (push) Has been cancelled

This commit is contained in:
Zac Gaetano 2026-05-06 15:59:11 -04:00
parent 4b8d9f0e8c
commit e8f39daa75

View file

@ -0,0 +1,45 @@
groups:
- name: dragonfork-webrtc
rules:
- alert: WebRTCWHEPErrorRateHigh
expr: |
sum by (stream_id) (
rate(dragonfork_webrtc_whep_requests_total{code=~"4..|5.."}[5m])
) > 0.5
for: 5m
labels:
severity: warning
annotations:
summary: "WHEP error rate high on stream {{ $labels.stream_id }}"
description: "Sustained 4xx/5xx rate >0.5/sec for 5m."
- alert: WebRTCICEEstablishmentSlow
expr: |
histogram_quantile(0.95,
sum by (le, stream_id) (
rate(dragonfork_webrtc_ice_establishment_duration_seconds_bucket[10m])
)
) > 3
for: 10m
labels:
severity: warning
annotations:
summary: "ICE establishment p95 >3s on {{ $labels.stream_id }}"
- alert: WebRTCICEFailureRateHigh
expr: |
sum by (stream_id) (rate(dragonfork_webrtc_ice_failures_total[5m])) > 0.2
for: 5m
labels:
severity: warning
annotations:
summary: "ICE failures sustained on {{ $labels.stream_id }}"
- alert: WebRTCFFmpegLegFailure
expr: |
increase(dragonfork_webrtc_ffmpeg_leg_failures_total[5m]) > 0
labels:
severity: critical
annotations:
summary: "FFmpeg RTP leg failed on {{ $labels.stream_id }} ({{ $labels.leg }})"
description: "Process stopped while peers were active. Check FFmpeg logs."