From 8266ca72e618dabcf0412cf4d06177937c56b50d Mon Sep 17 00:00:00 2001 From: ZGaetano Date: Sun, 10 May 2026 13:19:56 -0400 Subject: [PATCH] fix(webrtc): detect STAP-A IDR start in keyframe cache (issue #18) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend isH264IDRStart to handle STAP-A aggregates (NAL type 24, RFC 6184 §5.7.1). The first NAL in the aggregate starts at byte 3 (after the 2-byte size field); if its type is 5 (IDR slice) the packet is treated as an IDR start and the burst cache is reset. This closes the gap noted in NOTES.md: a publisher using STAP-A for IDR (e.g. a custom GStreamer pipeline or hardware encoder) will now correctly reset the burst rather than accumulating packets until hitting the 512- packet / 2 MiB capacity cap. --- core/webrtc/keyframecache.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/core/webrtc/keyframecache.go b/core/webrtc/keyframecache.go index 83caeb7..9227f04 100644 --- a/core/webrtc/keyframecache.go +++ b/core/webrtc/keyframecache.go @@ -38,15 +38,15 @@ func newKeyFrameCache() *keyFrameCache { } // isH264IDRStart returns true if pkt begins an H.264 IDR (keyframe) -// NAL. It recognises: +// NAL. It recognises three RFC 6184 packetisation modes: // -// - Single NAL unit packets where NAL type == 5 (IDR slice). -// - FU-A fragments with the start bit set and inner NAL type == 5. -// -// STAP-A aggregates that happen to lead with an IDR NAL are not -// detected here; in practice FFmpeg and GStreamer never use STAP-A for -// IDR slices because the slices are too large. If that assumption -// changes, add STAP-A handling in a future revision. +// - Single NAL unit (type 5): the entire payload is one IDR slice. +// - FU-A fragment (type 28): the FU header byte has the start bit set +// (0x80) and the inner NAL type is 5. +// - STAP-A aggregate (type 24): the first NAL in the aggregate is an +// IDR slice. STAP-A format: byte 0 = NAL header (type 24), bytes +// 1–2 = first NAL size (big-endian uint16), byte 3 = first NAL +// header. Minimum valid payload: 4 bytes. func isH264IDRStart(pkt *rtp.Packet) bool { p := pkt.Payload if len(p) == 0 { @@ -56,6 +56,8 @@ func isH264IDRStart(pkt *rtp.Packet) bool { switch nalType { case 5: // Single NAL unit, IDR slice return true + case 24: // STAP-A — bytes 1–2 are the first NAL's size; byte 3 is its header + return len(p) >= 4 && p[3]&0x1F == 5 case 28: // FU-A — byte 1 is the FU header: bit 7 = start, bits 4–0 = inner type return len(p) >= 2 && p[1]&0x80 != 0 && p[1]&0x1F == 5 }