datarhei-dragonfork-core/core/webrtc/keyframecache.go

101 lines
3.2 KiB
Go
Raw Normal View History

package webrtc
import (
"sync"
"github.com/pion/rtp"
)
// keyFrameCache retains the most recent H.264 keyframe burst so that
// new WHEP subscribers can receive it immediately on Subscribe(),
// cutting first-frame latency from up to one IDR interval (typically
// 2 s at a 0.5 Hz keyframe rate) to nearly zero.
//
// A "burst" spans all RTP packets from the first fragment of an IDR NAL
// until (but not including) the next IDR NAL. The cache is bounded by
// maxPackets and maxBytes to cap per-stream memory usage.
//
// Thread safety: all public methods are safe for concurrent use.
// push() is intended to be called only from the single-goroutine
// readLoop — the lock it holds is small and brief.
type keyFrameCache struct {
mu sync.Mutex
packets []*rtp.Packet
byteLen int
maxPackets int
maxBytes int
}
// newKeyFrameCache returns a cache bounded to 512 packets / 2 MiB.
// At typical H.264 streaming bitrates (14 Mbps), an IDR frame plus a
// handful of subsequent P-frames fits comfortably within these limits.
func newKeyFrameCache() *keyFrameCache {
return &keyFrameCache{
packets: make([]*rtp.Packet, 0, 64),
maxPackets: 512,
maxBytes: 2 << 20, // 2 MiB
}
}
// isH264IDRStart returns true if pkt begins an H.264 IDR (keyframe)
// NAL. It recognises:
//
// - Single NAL unit packets where NAL type == 5 (IDR slice).
// - FU-A fragments with the start bit set and inner NAL type == 5.
//
// STAP-A aggregates that happen to lead with an IDR NAL are not
// detected here; in practice FFmpeg and GStreamer never use STAP-A for
// IDR slices because the slices are too large. If that assumption
// changes, add STAP-A handling in a future revision.
func isH264IDRStart(pkt *rtp.Packet) bool {
p := pkt.Payload
if len(p) == 0 {
return false
}
nalType := p[0] & 0x1F
switch nalType {
case 5: // Single NAL unit, IDR slice
return true
case 28: // FU-A — byte 1 is the FU header: bit 7 = start, bits 40 = inner type
return len(p) >= 2 && p[1]&0x80 != 0 && p[1]&0x1F == 5
}
return false
}
// push appends pkt to the cache. If pkt is the start of an H.264 IDR
// NAL the existing burst is cleared first so the cache always holds
// exactly one complete keyframe burst. Packets beyond the capacity
// limits are silently dropped.
//
// push is called exclusively from readLoop (a single goroutine); the
// isH264IDRStart check outside the lock is therefore safe.
func (c *keyFrameCache) push(pkt *rtp.Packet) {
isIDR := isH264IDRStart(pkt)
payloadLen := len(pkt.Payload)
c.mu.Lock()
if isIDR {
c.packets = c.packets[:0]
c.byteLen = 0
}
if len(c.packets) < c.maxPackets && c.byteLen+payloadLen <= c.maxBytes {
c.packets = append(c.packets, pkt)
c.byteLen += payloadLen
}
c.mu.Unlock()
}
// snapshot returns a shallow copy of the current burst. The returned
// slice is safe to iterate without holding any lock; the *rtp.Packet
// values are never mutated after being placed in the cache.
// Returns nil when the cache is empty.
func (c *keyFrameCache) snapshot() []*rtp.Packet {
c.mu.Lock()
defer c.mu.Unlock()
if len(c.packets) == 0 {
return nil
}
snap := make([]*rtp.Packet, len(c.packets))
copy(snap, c.packets)
return snap
}