diff --git a/core/webrtc/keyframecache.go b/core/webrtc/keyframecache.go index 83caeb7..9227f04 100644 --- a/core/webrtc/keyframecache.go +++ b/core/webrtc/keyframecache.go @@ -38,15 +38,15 @@ func newKeyFrameCache() *keyFrameCache { } // isH264IDRStart returns true if pkt begins an H.264 IDR (keyframe) -// NAL. It recognises: +// NAL. It recognises three RFC 6184 packetisation modes: // -// - Single NAL unit packets where NAL type == 5 (IDR slice). -// - FU-A fragments with the start bit set and inner NAL type == 5. -// -// STAP-A aggregates that happen to lead with an IDR NAL are not -// detected here; in practice FFmpeg and GStreamer never use STAP-A for -// IDR slices because the slices are too large. If that assumption -// changes, add STAP-A handling in a future revision. +// - Single NAL unit (type 5): the entire payload is one IDR slice. +// - FU-A fragment (type 28): the FU header byte has the start bit set +// (0x80) and the inner NAL type is 5. +// - STAP-A aggregate (type 24): the first NAL in the aggregate is an +// IDR slice. STAP-A format: byte 0 = NAL header (type 24), bytes +// 1–2 = first NAL size (big-endian uint16), byte 3 = first NAL +// header. Minimum valid payload: 4 bytes. func isH264IDRStart(pkt *rtp.Packet) bool { p := pkt.Payload if len(p) == 0 { @@ -56,6 +56,8 @@ func isH264IDRStart(pkt *rtp.Packet) bool { switch nalType { case 5: // Single NAL unit, IDR slice return true + case 24: // STAP-A — bytes 1–2 are the first NAL's size; byte 3 is its header + return len(p) >= 4 && p[3]&0x1F == 5 case 28: // FU-A — byte 1 is the FU header: bit 7 = start, bits 4–0 = inner type return len(p) >= 2 && p[1]&0x80 != 0 && p[1]&0x1F == 5 }