diff --git a/core/webrtc/keyframecache.go b/core/webrtc/keyframecache.go new file mode 100644 index 0000000..83caeb7 --- /dev/null +++ b/core/webrtc/keyframecache.go @@ -0,0 +1,100 @@ +package webrtc + +import ( + "sync" + + "github.com/pion/rtp" +) + +// keyFrameCache retains the most recent H.264 keyframe burst so that +// new WHEP subscribers can receive it immediately on Subscribe(), +// cutting first-frame latency from up to one IDR interval (typically +// 2 s at a 0.5 Hz keyframe rate) to nearly zero. +// +// A "burst" spans all RTP packets from the first fragment of an IDR NAL +// until (but not including) the next IDR NAL. The cache is bounded by +// maxPackets and maxBytes to cap per-stream memory usage. +// +// Thread safety: all public methods are safe for concurrent use. +// push() is intended to be called only from the single-goroutine +// readLoop — the lock it holds is small and brief. +type keyFrameCache struct { + mu sync.Mutex + packets []*rtp.Packet + byteLen int + maxPackets int + maxBytes int +} + +// newKeyFrameCache returns a cache bounded to 512 packets / 2 MiB. +// At typical H.264 streaming bitrates (1–4 Mbps), an IDR frame plus a +// handful of subsequent P-frames fits comfortably within these limits. +func newKeyFrameCache() *keyFrameCache { + return &keyFrameCache{ + packets: make([]*rtp.Packet, 0, 64), + maxPackets: 512, + maxBytes: 2 << 20, // 2 MiB + } +} + +// isH264IDRStart returns true if pkt begins an H.264 IDR (keyframe) +// NAL. It recognises: +// +// - Single NAL unit packets where NAL type == 5 (IDR slice). +// - FU-A fragments with the start bit set and inner NAL type == 5. +// +// STAP-A aggregates that happen to lead with an IDR NAL are not +// detected here; in practice FFmpeg and GStreamer never use STAP-A for +// IDR slices because the slices are too large. If that assumption +// changes, add STAP-A handling in a future revision. +func isH264IDRStart(pkt *rtp.Packet) bool { + p := pkt.Payload + if len(p) == 0 { + return false + } + nalType := p[0] & 0x1F + switch nalType { + case 5: // Single NAL unit, IDR slice + return true + case 28: // FU-A — byte 1 is the FU header: bit 7 = start, bits 4–0 = inner type + return len(p) >= 2 && p[1]&0x80 != 0 && p[1]&0x1F == 5 + } + return false +} + +// push appends pkt to the cache. If pkt is the start of an H.264 IDR +// NAL the existing burst is cleared first so the cache always holds +// exactly one complete keyframe burst. Packets beyond the capacity +// limits are silently dropped. +// +// push is called exclusively from readLoop (a single goroutine); the +// isH264IDRStart check outside the lock is therefore safe. +func (c *keyFrameCache) push(pkt *rtp.Packet) { + isIDR := isH264IDRStart(pkt) + payloadLen := len(pkt.Payload) + c.mu.Lock() + if isIDR { + c.packets = c.packets[:0] + c.byteLen = 0 + } + if len(c.packets) < c.maxPackets && c.byteLen+payloadLen <= c.maxBytes { + c.packets = append(c.packets, pkt) + c.byteLen += payloadLen + } + c.mu.Unlock() +} + +// snapshot returns a shallow copy of the current burst. The returned +// slice is safe to iterate without holding any lock; the *rtp.Packet +// values are never mutated after being placed in the cache. +// Returns nil when the cache is empty. +func (c *keyFrameCache) snapshot() []*rtp.Packet { + c.mu.Lock() + defer c.mu.Unlock() + if len(c.packets) == 0 { + return nil + } + snap := make([]*rtp.Packet, len(c.packets)) + copy(snap, c.packets) + return snap +}