From ccdf23c8a61b36a314224f5406bf10bb9f27ef6c Mon Sep 17 00:00:00 2001 From: Raja Subramanian Date: Fri, 1 May 2026 20:06:29 +0530 Subject: [PATCH] Use mediatransportutil/codec package, no functional change (#4497) --- go.mod | 2 +- go.sum | 4 +- pkg/rtc/mediatrackreceiver.go | 5 +- pkg/rtc/wrappedreceiver.go | 3 +- pkg/sfu/buffer/buffer_base.go | 41 +- pkg/sfu/buffer/dependencydescriptorparser.go | 7 +- pkg/sfu/buffer/fps.go | 3 +- pkg/sfu/buffer/fps_test.go | 3 +- pkg/sfu/buffer/h26xhelper.go | 541 ------------------ pkg/sfu/buffer/h26xhelper_test.go | 86 --- pkg/sfu/buffer/helpers.go | 479 ---------------- pkg/sfu/buffer/helpers_test.go | 110 ---- pkg/sfu/codecmunger/vp8.go | 17 +- pkg/sfu/codecmunger/vp8_test.go | 28 +- pkg/sfu/forwarder_test.go | 35 +- pkg/sfu/receiver_base.go | 11 +- pkg/sfu/testutils/data.go | 5 +- .../temporallayerselector/vp8.go | 3 +- 18 files changed, 86 insertions(+), 1297 deletions(-) delete mode 100644 pkg/sfu/buffer/h26xhelper.go delete mode 100644 pkg/sfu/buffer/h26xhelper_test.go delete mode 100644 pkg/sfu/buffer/helpers.go delete mode 100644 pkg/sfu/buffer/helpers_test.go diff --git a/go.mod b/go.mod index dcaa43d94..7ad5b769e 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/jellydator/ttlcache/v3 v3.4.0 github.com/jxskiss/base62 v1.1.0 github.com/livekit/mageutil v0.0.0-20250511045019-0f1ff63f7731 - github.com/livekit/mediatransportutil v0.0.0-20260422071032-c2eb358ac882 + github.com/livekit/mediatransportutil v0.0.0-20260501135216-8818f1b77e59 github.com/livekit/protocol v1.45.6 github.com/livekit/psrpc v0.7.1 github.com/mackerelio/go-osstat v0.2.7 diff --git a/go.sum b/go.sum index ce26128c4..368c2347f 100644 --- a/go.sum +++ b/go.sum @@ -179,8 +179,8 @@ github.com/lithammer/shortuuid/v4 v4.2.0 h1:LMFOzVB3996a7b8aBuEXxqOBflbfPQAiVzkI github.com/lithammer/shortuuid/v4 v4.2.0/go.mod h1:D5noHZ2oFw/YaKCfGy0YxyE7M0wMbezmMjPdhyEFe6Y= github.com/livekit/mageutil v0.0.0-20250511045019-0f1ff63f7731 h1:9x+U2HGLrSw5ATTo469PQPkqzdoU7be46ryiCDO3boc= github.com/livekit/mageutil v0.0.0-20250511045019-0f1ff63f7731/go.mod h1:Rs3MhFwutWhGwmY1VQsygw28z5bWcnEYmS1OG9OxjOQ= -github.com/livekit/mediatransportutil v0.0.0-20260422071032-c2eb358ac882 h1:HHG7ogGrxU/cCQ7mq6zT8V684+BSWPAaW3N5906FU/c= -github.com/livekit/mediatransportutil v0.0.0-20260422071032-c2eb358ac882/go.mod h1:RCd46PT+6sEztld6XpkCrG1xskb0u3SqxIjy4G897Ss= +github.com/livekit/mediatransportutil v0.0.0-20260501135216-8818f1b77e59 h1:lWRMrb4ReRJu/e/BAp1kpT6fQOjS8WjCxdp0PGjgrBc= +github.com/livekit/mediatransportutil v0.0.0-20260501135216-8818f1b77e59/go.mod h1:RCd46PT+6sEztld6XpkCrG1xskb0u3SqxIjy4G897Ss= github.com/livekit/protocol v1.45.6 h1:E+wKxs8ckKNYYTNyHm5nR1ShGLJ5DmA+WCEb5AJG11A= github.com/livekit/protocol v1.45.6/go.mod h1:e6QdWDkfot+M2nRh0eitJUS0ZLuwvKCsfiz2pWWSG3s= github.com/livekit/psrpc v0.7.1 h1:ms37az0QTD3UXIWuUC5D/SkmKOlRMVRsI261eBWu/Vw= diff --git a/pkg/rtc/mediatrackreceiver.go b/pkg/rtc/mediatrackreceiver.go index f4643af94..cffde83b5 100644 --- a/pkg/rtc/mediatrackreceiver.go +++ b/pkg/rtc/mediatrackreceiver.go @@ -26,6 +26,7 @@ import ( "go.uber.org/atomic" "google.golang.org/protobuf/proto" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/protocol/codecs/mime" "github.com/livekit/protocol/livekit" "github.com/livekit/protocol/logger" @@ -976,7 +977,7 @@ func (t *MediaTrackReceiver) UpdateVideoTrack(update *livekit.UpdateLocalVideoTr t.params.Logger.Debugw("updated video track", "before", logger.Proto(trackInfo), "after", logger.Proto(clonedInfo)) } -func (t *MediaTrackReceiver) UpdateVideoSize(mimeType mime.MimeType, sizes []buffer.VideoSize) { +func (t *MediaTrackReceiver) UpdateVideoSize(mimeType mime.MimeType, sizes []codec.VideoSize) { var changed bool t.lock.Lock() trackInfo := t.TrackInfo() @@ -1062,7 +1063,7 @@ func (t *MediaTrackReceiver) GetQualityForDimension(mimeType mime.MimeType, widt trackInfo := t.TrackInfo() - var mediaSizes []buffer.VideoSize + var mediaSizes []codec.VideoSize if receiver := t.Receiver(mimeType); receiver != nil { mediaSizes = receiver.VideoSizes() } diff --git a/pkg/rtc/wrappedreceiver.go b/pkg/rtc/wrappedreceiver.go index 6623b3c59..4ff4da723 100644 --- a/pkg/rtc/wrappedreceiver.go +++ b/pkg/rtc/wrappedreceiver.go @@ -23,6 +23,7 @@ import ( "github.com/pion/webrtc/v4" "go.uber.org/atomic" + "github.com/livekit/mediatransportutil/pkg/codec" protoCodecs "github.com/livekit/protocol/codecs" "github.com/livekit/protocol/codecs/mime" "github.com/livekit/protocol/livekit" @@ -469,7 +470,7 @@ func (d *DummyReceiver) CodecState() sfu.ReceiverCodecState { return sfu.ReceiverCodecStateNormal } -func (d *DummyReceiver) VideoSizes() []buffer.VideoSize { +func (d *DummyReceiver) VideoSizes() []codec.VideoSize { if receiver := d.getReceiver(); receiver != nil { return receiver.VideoSizes() } diff --git a/pkg/sfu/buffer/buffer_base.go b/pkg/sfu/buffer/buffer_base.go index 20e3bc7c9..241950a5a 100644 --- a/pkg/sfu/buffer/buffer_base.go +++ b/pkg/sfu/buffer/buffer_base.go @@ -36,6 +36,7 @@ import ( "github.com/livekit/livekit-server/pkg/sfu/rtpstats" "github.com/livekit/livekit-server/pkg/sfu/utils" "github.com/livekit/mediatransportutil/pkg/bucket" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/mediatransportutil/pkg/nack" "github.com/livekit/protocol/codecs/mime" "github.com/livekit/protocol/livekit" @@ -79,12 +80,6 @@ type ExtPacket struct { IsBuffered bool } -// VideoSize represents video resolution -type VideoSize struct { - Width uint32 - Height uint32 -} - type BufferProvider interface { SetLogger(lgr logger.Logger) SetAudioLevelConfig(audioLevelConfig audio.AudioLevelConfig) @@ -111,7 +106,7 @@ type BufferProvider interface { OnRtcpSenderReport(fn func()) OnFpsChanged(fn func()) - OnVideoSizeChanged(fn func([]VideoSize)) + OnVideoSizeChanged(fn func([]codec.VideoSize)) OnCodecChange(fn func(webrtc.RTPCodecParameters)) OnStreamRestart(fn func(string)) @@ -192,12 +187,12 @@ type BufferBase struct { // callbacks onRtcpSenderReport func() onFpsChanged func() - onVideoSizeChanged func([]VideoSize) + onVideoSizeChanged func([]codec.VideoSize) onCodecChange func(webrtc.RTPCodecParameters) onStreamRestart func(string) // video size tracking for multiple spatial layers - currentVideoSize [DefaultMaxLayerSpatial + 1]VideoSize + currentVideoSize [DefaultMaxLayerSpatial + 1]codec.VideoSize logger logger.Logger @@ -1065,7 +1060,7 @@ func (b *BufferBase) processVideoPacket(ep *ExtPacket) error { } ep.Temporal = 0 - var videoSize []VideoSize + var videoSize []codec.VideoSize if b.ddParser != nil { ddVal, videoLayer, err := b.ddParser.Parse(ep.Packet) if err != nil { @@ -1088,7 +1083,7 @@ func (b *BufferBase) processVideoPacket(ep *ExtPacket) error { switch b.mime { case mime.MimeTypeVP8: - vp8Packet := VP8{} + vp8Packet := codec.VP8{} if err := vp8Packet.Unmarshal(ep.Packet.Payload); err != nil { b.logger.Warnw("could not unmarshal VP8 packet", err) return err @@ -1098,7 +1093,7 @@ func (b *BufferBase) processVideoPacket(ep *ExtPacket) error { ep.Temporal = int32(vp8Packet.TID) if ep.IsKeyFrame { - if sz := ExtractVP8VideoSize(&vp8Packet, ep.Packet.Payload); sz.Width > 0 && sz.Height > 0 { + if sz := codec.ExtractVP8VideoSize(&vp8Packet, ep.Packet.Payload); sz.Width > 0 && sz.Height > 0 { videoSize = append(videoSize, sz) } } @@ -1122,36 +1117,36 @@ func (b *BufferBase) processVideoPacket(ep *ExtPacket) error { Temporal: int32(vp9Packet.TID), } ep.Payload = vp9Packet - ep.IsKeyFrame = IsVP9KeyFrame(&vp9Packet, ep.Packet.Payload) + ep.IsKeyFrame = codec.IsVP9KeyFrame(&vp9Packet, ep.Packet.Payload) if ep.IsKeyFrame { for i := 0; i < len(vp9Packet.Width); i++ { - videoSize = append(videoSize, VideoSize{ + videoSize = append(videoSize, codec.VideoSize{ Width: uint32(vp9Packet.Width[i]), Height: uint32(vp9Packet.Height[i]), }) } } } else { - ep.IsKeyFrame = IsVP9KeyFrame(nil, ep.Packet.Payload) + ep.IsKeyFrame = codec.IsVP9KeyFrame(nil, ep.Packet.Payload) } case mime.MimeTypeH264: - ep.IsKeyFrame = IsH264KeyFrame(ep.Packet.Payload) + ep.IsKeyFrame = codec.IsH264KeyFrame(ep.Packet.Payload) ep.Spatial = InvalidLayerSpatial // h.264 don't have spatial scalability, reset to invalid // Check H264 key frame video size if ep.IsKeyFrame { - if sz := ExtractH264VideoSize(ep.Packet.Payload); sz.Width > 0 && sz.Height > 0 { + if sz := codec.ExtractH264VideoSize(ep.Packet.Payload); sz.Width > 0 && sz.Height > 0 { videoSize = append(videoSize, sz) } } case mime.MimeTypeAV1: - ep.IsKeyFrame = IsAV1KeyFrame(ep.Packet.Payload) + ep.IsKeyFrame = codec.IsAV1KeyFrame(ep.Packet.Payload) case mime.MimeTypeH265: - ep.IsKeyFrame = IsH265KeyFrame(ep.Packet.Payload) + ep.IsKeyFrame = codec.IsH265KeyFrame(ep.Packet.Payload) if ep.DependencyDescriptor == nil { if len(ep.Packet.Payload) < 2 { b.logger.Warnw("invalid H265 packet", nil, "payloadLen", len(ep.Packet.Payload)) @@ -1163,7 +1158,7 @@ func (b *BufferBase) processVideoPacket(ep *ExtPacket) error { ep.Spatial = InvalidLayerSpatial if ep.IsKeyFrame { - if sz := ExtractH265VideoSize(ep.Packet.Payload); sz.Width > 0 && sz.Height > 0 { + if sz := codec.ExtractH265VideoSize(ep.Packet.Payload); sz.Width > 0 && sz.Height > 0 { videoSize = append(videoSize, sz) } } @@ -1430,7 +1425,7 @@ func (b *BufferBase) OnFpsChanged(f func()) { b.Unlock() } -func (b *BufferBase) OnVideoSizeChanged(fn func([]VideoSize)) { +func (b *BufferBase) OnVideoSizeChanged(fn func([]codec.VideoSize)) { b.Lock() b.onVideoSizeChanged = fn b.Unlock() @@ -1449,7 +1444,7 @@ func (b *BufferBase) OnStreamRestart(fn func(string)) { } // checkVideoSizeChange checks if video size has changed for a specific spatial layer and fires callback -func (b *BufferBase) checkVideoSizeChange(videoSizes []VideoSize) { +func (b *BufferBase) checkVideoSizeChange(videoSizes []codec.VideoSize) { if len(videoSizes) > len(b.currentVideoSize) { b.logger.Warnw( "video size index out of range", nil, @@ -1460,7 +1455,7 @@ func (b *BufferBase) checkVideoSizeChange(videoSizes []VideoSize) { } if len(videoSizes) < len(b.currentVideoSize) { - videoSizes = append(videoSizes, make([]VideoSize, len(b.currentVideoSize)-len(videoSizes))...) + videoSizes = append(videoSizes, make([]codec.VideoSize, len(b.currentVideoSize)-len(videoSizes))...) } changed := false diff --git a/pkg/sfu/buffer/dependencydescriptorparser.go b/pkg/sfu/buffer/dependencydescriptorparser.go index 842aae777..618200aab 100644 --- a/pkg/sfu/buffer/dependencydescriptorparser.go +++ b/pkg/sfu/buffer/dependencydescriptorparser.go @@ -24,6 +24,7 @@ import ( "go.uber.org/atomic" dd "github.com/livekit/livekit-server/pkg/sfu/rtpextension/dependencydescriptor" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/mediatransportutil/pkg/utils" "github.com/livekit/protocol/logger" ) @@ -314,14 +315,14 @@ func GetActiveDecodeTargetBitmask(layer VideoLayer, decodeTargets []DependencyDe // ------------------------------------------------------------------------------ -func ExtractDependencyDescriptorVideoSize(dd *dd.DependencyDescriptor) []VideoSize { +func ExtractDependencyDescriptorVideoSize(dd *dd.DependencyDescriptor) []codec.VideoSize { if dd.AttachedStructure == nil { return nil } - videoSizes := make([]VideoSize, 0, len(dd.AttachedStructure.Resolutions)) + videoSizes := make([]codec.VideoSize, 0, len(dd.AttachedStructure.Resolutions)) for _, res := range dd.AttachedStructure.Resolutions { - videoSizes = append(videoSizes, VideoSize{Width: uint32(res.Width), Height: uint32(res.Height)}) + videoSizes = append(videoSizes, codec.VideoSize{Width: uint32(res.Width), Height: uint32(res.Height)}) } return videoSizes diff --git a/pkg/sfu/buffer/fps.go b/pkg/sfu/buffer/fps.go index 5622198f1..023e40c63 100644 --- a/pkg/sfu/buffer/fps.go +++ b/pkg/sfu/buffer/fps.go @@ -19,6 +19,7 @@ import ( "github.com/pion/rtp/codecs" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/protocol/logger" ) @@ -209,7 +210,7 @@ func (f *FrameRateCalculatorVP8) RecvPacket(ep *ExtPacket) bool { return true } - vp8, ok := ep.Payload.(VP8) + vp8, ok := ep.Payload.(codec.VP8) if !ok { f.logger.Debugw("no vp8 payload", "sn", ep.Packet.SequenceNumber) return false diff --git a/pkg/sfu/buffer/fps_test.go b/pkg/sfu/buffer/fps_test.go index e9071eb09..9c32cc5a0 100644 --- a/pkg/sfu/buffer/fps_test.go +++ b/pkg/sfu/buffer/fps_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/require" dd "github.com/livekit/livekit-server/pkg/sfu/rtpextension/dependencydescriptor" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/protocol/logger" ) @@ -35,7 +36,7 @@ type testFrameInfo struct { func (f *testFrameInfo) toVP8() *ExtPacket { return &ExtPacket{ Packet: &rtp.Packet{Header: f.header}, - Payload: VP8{ + Payload: codec.VP8{ PictureID: f.framenumber, }, VideoLayer: VideoLayer{Spatial: InvalidLayerSpatial, Temporal: int32(f.temporal)}, diff --git a/pkg/sfu/buffer/h26xhelper.go b/pkg/sfu/buffer/h26xhelper.go deleted file mode 100644 index 70323d003..000000000 --- a/pkg/sfu/buffer/h26xhelper.go +++ /dev/null @@ -1,541 +0,0 @@ -package buffer - -import ( - "errors" - "fmt" -) - -// SPSInfo holds parsed SPS parameters -type SPSInfo struct { - ChromaFormatIDC uint - PicWidthInLumaSamples uint - PicHeightInLumaSamples uint - ConformanceWindowFlag bool - ConfWinLeftOffset uint - ConfWinRightOffset uint - ConfWinTopOffset uint - ConfWinBottomOffset uint - CodedWidth, CodedHeight uint // Raw coded resolution - DisplayWidth, DisplayHeight uint // Resolution after conformance window cropping -} - -// -------- BitReader -------- -type BitReader struct { - data []byte - pos int // bit position -} - -func NewBitReader(data []byte) *BitReader { - return &BitReader{data: data} -} - -func (br *BitReader) left() int { - return len(br.data)*8 - br.pos -} - -func (br *BitReader) ReadBits(n int) (uint, error) { - if n < 0 || br.left() < n { - return 0, errors.New("not enough bits") - } - var v uint - for range n { - bytePos := br.pos / 8 - bitPos := 7 - (br.pos % 8) - bit := (br.data[bytePos] >> bitPos) & 1 - v = (v << 1) | uint(bit) - br.pos++ - } - return v, nil -} - -func (br *BitReader) ReadFlag() (bool, error) { - b, err := br.ReadBits(1) - return b == 1, err -} - -func (br *BitReader) ReadUE() (uint, error) { - // Unsigned Exp-Golomb - zeros := 0 - for { - bit, err := br.ReadBits(1) - if err != nil { - return 0, err - } - if bit == 0 { - zeros++ - continue - } - break // hit the stop bit '1' - } - if zeros == 0 { - return 0, nil - } - info, err := br.ReadBits(zeros) - if err != nil { - return 0, err - } - return (1<= 4 && b[0] == 0x00 && b[1] == 0x00 && b[2] == 0x00 && b[3] == 0x01 { - return b[4:] - } - if len(b) >= 3 && b[0] == 0x00 && b[1] == 0x00 && b[2] == 0x01 { - return b[3:] - } - return b -} - -// removeEmulationPreventionBytes removes 0x03 after 0x0000 -func removeEmulationPreventionBytes(data []byte) []byte { - out := make([]byte, 0, len(data)) - for i := range data { - if i > 1 && data[i] == 0x03 && data[i-1] == 0x00 && data[i-2] == 0x00 { - continue - } - out = append(out, data[i]) - } - return out -} - -// parseH265SPS parses a full H.265 SPS NAL unit -func parseH265SPS(nal []byte) (*SPSInfo, error) { - // Optional start code - nal = stripStartCode(nal) - - // Remove emulation prevention bytes across the NAL - rbsp := removeEmulationPreventionBytes(nal) - - br := NewBitReader(rbsp) - - // ---- NAL header (16 bits): forbidden_zero_bit(1), nal_unit_type(6), nuh_layer_id(6), nuh_temporal_id_plus1(3) - if _, err := br.ReadBits(1); err != nil { // forbidden_zero_bit - return nil, err - } - nalUnitType, err := br.ReadBits(6) - if err != nil { - return nil, err - } - if _, err = br.ReadBits(6); err != nil { // nuh_layer_id - return nil, err - } - if _, err = br.ReadBits(3); err != nil { // nuh_temporal_id_plus1 - return nil, err - } - // 33 = SPS - if nalUnitType != 33 { - return nil, fmt.Errorf("not an HEVC SPS NAL (type=%d)", nalUnitType) - } - - // ---- sps_video_parameter_set_id u(4), sps_max_sub_layers_minus1 u(3), sps_temporal_id_nesting_flag u(1) - if _, err = br.ReadBits(4); err != nil { - return nil, err - } - maxSubLayersMinus1, err := br.ReadBits(3) - if err != nil { - return nil, err - } - if _, err = br.ReadBits(1); err != nil { - return nil, err - } - - // ---- profile_tier_level(1, max_sub_layers_minus1) - // general_profile_space u(2), general_tier_flag u(1), general_profile_idc u(5) - if _, err = br.ReadBits(2 + 1 + 5); err != nil { - return nil, err - } - // general_profile_compatibility_flags u(32) - if _, err = br.ReadBits(32); err != nil { - return nil, err - } - // general_constraint_indicator_flags u(48) - if _, err = br.ReadBits(16); err != nil { - return nil, err - } - if _, err = br.ReadBits(32); err != nil { - return nil, err - } - // general_level_idc u(8) - if _, err = br.ReadBits(8); err != nil { - return nil, err - } - - subLayerProfilePresentFlag := make([]bool, maxSubLayersMinus1) - subLayerLevelPresentFlag := make([]bool, maxSubLayersMinus1) - for i := range maxSubLayersMinus1 { - f1, err := br.ReadFlag() - if err != nil { - return nil, err - } - f2, err := br.ReadFlag() - if err != nil { - return nil, err - } - subLayerProfilePresentFlag[i] = f1 - subLayerLevelPresentFlag[i] = f2 - } - if maxSubLayersMinus1 > 0 { - // reserved_zero_2bits for i = maxSubLayersMinus1 .. 7 - for i := maxSubLayersMinus1; i < 8; i++ { - if _, err := br.ReadBits(2); err != nil { - return nil, err - } - } - } - for i := range maxSubLayersMinus1 { - if subLayerProfilePresentFlag[i] { - if _, err = br.ReadBits(2 + 1 + 5); err != nil { - return nil, err - } - if _, err = br.ReadBits(32); err != nil { - return nil, err - } - if _, err = br.ReadBits(48); err != nil { - return nil, err - } - } - if subLayerLevelPresentFlag[i] { - if _, err = br.ReadBits(8); err != nil { - return nil, err - } - } - } - - // ---- Now the core SPS fields we need - _, err = br.ReadUE() // sps_seq_parameter_set_id - if err != nil { - return nil, err - } - - chromaFormatIDC, err := br.ReadUE() - if err != nil { - return nil, err - } - if chromaFormatIDC == 3 { - // separate_colour_plane_flag u(1) - if _, err := br.ReadFlag(); err != nil { - return nil, err - } - } - - picW, err := br.ReadUE() // pic_width_in_luma_samples - if err != nil { - return nil, err - } - picH, err := br.ReadUE() // pic_height_in_luma_samples - if err != nil { - return nil, err - } - - confFlag, err := br.ReadFlag() - if err != nil { - return nil, err - } - var l, r, t, b uint - if confFlag { - if l, err = br.ReadUE(); err != nil { - return nil, err - } - if r, err = br.ReadUE(); err != nil { - return nil, err - } - if t, err = br.ReadUE(); err != nil { - return nil, err - } - if b, err = br.ReadUE(); err != nil { - return nil, err - } - } - - // crop unit size depends on chroma_format_idc - subWidthC, subHeightC := getSubWidthC(chromaFormatIDC), getSubHeightC(chromaFormatIDC) - - info := &SPSInfo{ - ChromaFormatIDC: chromaFormatIDC, - PicWidthInLumaSamples: picW, - PicHeightInLumaSamples: picH, - ConformanceWindowFlag: confFlag, - ConfWinLeftOffset: l, - ConfWinRightOffset: r, - ConfWinTopOffset: t, - ConfWinBottomOffset: b, - CodedWidth: picW, - CodedHeight: picH, - } - - if confFlag { - w := int(picW) - int(l+r)*int(subWidthC) - h := int(picH) - int(t+b)*int(subHeightC) - if w < 0 { - w = 0 - } - if h < 0 { - h = 0 - } - info.DisplayWidth = uint(w) - info.DisplayHeight = uint(h) - } else { - info.DisplayWidth = picW - info.DisplayHeight = picH - } - - return info, nil -} - -func getSubWidthC(chromaFormatIDC uint) uint { - if chromaFormatIDC == 1 || chromaFormatIDC == 2 { - return 2 - } - return 1 -} - -func getSubHeightC(chromaFormatIDC uint) uint { - if chromaFormatIDC == 1 { - return 2 - } - return 1 -} - -func ExtractH265VideoSize(payload []byte) VideoSize { - if len(payload) < 2 { - return VideoSize{} - } - nalType := (payload[0] >> 1) & 0x3F - - var spsNalu []byte - switch nalType { - case 33: // SPS - spsNalu = payload - case 48: // Aggregation Packet (AP) - // skip 2-byte header - i := 2 - for i+2 <= len(payload) { - nalSize := int(payload[i])<<8 | int(payload[i+1]) - i += 2 - if i+nalSize > len(payload) { - break - } - if nalSize == 0 { - continue - } - nalUnit := payload[i : i+nalSize] - nt := (nalUnit[0] >> 1) & 0x3F - if nt == 33 { - spsNalu = nalUnit - break - } - i += nalSize - } - } - - if len(spsNalu) > 0 { - info, err := parseH265SPS(spsNalu) - if err != nil { - return VideoSize{} - } - return VideoSize{Width: uint32(info.DisplayWidth), Height: uint32(info.DisplayHeight)} - } - - return VideoSize{} -} - -// ------------------------- H264 ------------------------- - -// parseH264SPS parses a full H.264 SPS NAL unit into SPSInfo -func parseH264SPS(nal []byte) (*SPSInfo, error) { - if len(nal) < 1 { - return nil, errors.New("empty SPS NAL") - } - nal = stripStartCode(nal) - if len(nal) < 1 { - return nil, errors.New("empty SPS NAL after stripping start code") - } - nalType := nal[0] & 0x1F - if nalType != 7 { - return nil, fmt.Errorf("not an SPS NAL (type=%d)", nalType) - } - - rbsp := removeEmulationPreventionBytes(nal[1:]) // skip NAL header - br := NewBitReader(rbsp) - - profileIDC, _ := br.ReadBits(8) - _, _ = br.ReadBits(8) // constraint flags - _, _ = br.ReadBits(8) // level_idc - _, _ = br.ReadUE() // seq_parameter_set_id - - chromaFormatIDC := uint(1) - if profileIDC == 100 || profileIDC == 110 || profileIDC == 122 || profileIDC == 244 || - profileIDC == 44 || profileIDC == 83 || profileIDC == 86 || profileIDC == 118 || profileIDC == 128 { - chromaFormatIDC, _ = br.ReadUE() - if chromaFormatIDC == 3 { - br.ReadFlag() // separate_colour_plane_flag - } - br.ReadUE() // bit_depth_luma_minus8 - br.ReadUE() // bit_depth_chroma_minus8 - br.ReadFlag() // qpprime_y_zero_transform_bypass_flag - if v, _ := br.ReadFlag(); v { // seq_scaling_matrix_present_flag - for range 8 { - br.ReadFlag() - } - } - } - - br.ReadUE() // log2_max_frame_num_minus4 - pocType, _ := br.ReadUE() - switch pocType { - case 0: - br.ReadUE() - case 1: - br.ReadFlag() - br.ReadSE() - br.ReadSE() - cnt, _ := br.ReadUE() - if cnt > 255 { - return nil, errors.New("SPS: num_ref_frames_in_pic_order_cnt_cycle too large") - } - for range cnt { - br.ReadSE() - } - } - - br.ReadUE() // max_num_ref_frames - br.ReadFlag() // gaps_in_frame_num_value_allowed_flag - - wMbs, _ := br.ReadUE() - hMapUnits, _ := br.ReadUE() - frameMbsOnly, _ := br.ReadFlag() - if !frameMbsOnly { - br.ReadFlag() // mb_adaptive_frame_field_flag - } - br.ReadFlag() // direct_8x8_inference_flag - - var cropLeft, cropRight, cropTop, cropBottom uint - if frameCropping, _ := br.ReadFlag(); frameCropping { - cropLeft, _ = br.ReadUE() - cropRight, _ = br.ReadUE() - cropTop, _ = br.ReadUE() - cropBottom, _ = br.ReadUE() - } - - width := (wMbs + 1) * 16 - height := (hMapUnits + 1) * 16 - if !frameMbsOnly { - height *= 2 - } - - subWidthC := getSubWidthC(chromaFormatIDC) - subHeightC := getSubHeightC(chromaFormatIDC) - cropUnitX := subWidthC - cropUnitY := subHeightC - if chromaFormatIDC == 0 { - cropUnitX = 1 - if !frameMbsOnly { - cropUnitY = 2 - } else { - cropUnitY = 1 - } - } else if !frameMbsOnly { - cropUnitY *= 2 - } - - info := &SPSInfo{ - ChromaFormatIDC: chromaFormatIDC, - PicWidthInLumaSamples: width, - PicHeightInLumaSamples: height, - ConformanceWindowFlag: cropLeft+cropRight+cropTop+cropBottom > 0, - ConfWinLeftOffset: cropLeft, - ConfWinRightOffset: cropRight, - ConfWinTopOffset: cropTop, - ConfWinBottomOffset: cropBottom, - CodedWidth: width, - CodedHeight: height, - DisplayWidth: width - (cropLeft+cropRight)*cropUnitX, - DisplayHeight: height - (cropTop+cropBottom)*cropUnitY, - } - - return info, nil -} - -// ExtractH264VideoSize extracts resolution from H.264 RTP payload -func ExtractH264VideoSize(payload []byte) VideoSize { - if len(payload) < 1 { - return VideoSize{} - } - - parseNAL := func(nal []byte) VideoSize { - info, err := parseH264SPS(nal) - if err != nil { - return VideoSize{} - } - return VideoSize{Width: uint32(info.DisplayWidth), Height: uint32(info.DisplayHeight)} - } - - nalType := payload[0] & 0x1F - - switch nalType { - case 7: // SPS NAL - return parseNAL(payload) - - case 28: // FU-A - if len(payload) < 2 { - return VideoSize{} - } - start := (payload[1] & 0x80) != 0 - if !start { - return VideoSize{} - } - nalHeader := (payload[0] & 0xE0) | (payload[1] & 0x1F) - sps := append([]byte{nalHeader}, payload[2:]...) - return parseNAL(sps) - - case 24, 25, 26, 27: // STAP-A/B, MTAP16, MTAP24 - offset := 1 - switch nalType { - case 25: // STAP-B has 16-bit DON - offset += 2 - case 26: // MTAP16 - offset += 3 - case 27: // MTAP24 - offset += 4 - } - - for offset+2 <= len(payload) { - naluSize := int(payload[offset])<<8 | int(payload[offset+1]) - offset += 2 - if offset+naluSize > len(payload) { - break - } - if naluSize == 0 { - continue - } - nalu := payload[offset : offset+naluSize] - if nalu[0]&0x1F == 7 { // SPS - return parseNAL(nalu) - } - offset += naluSize - } - return VideoSize{} - - default: - return VideoSize{} - } -} diff --git a/pkg/sfu/buffer/h26xhelper_test.go b/pkg/sfu/buffer/h26xhelper_test.go deleted file mode 100644 index affbe4c50..000000000 --- a/pkg/sfu/buffer/h26xhelper_test.go +++ /dev/null @@ -1,86 +0,0 @@ -package buffer - -import ( - "encoding/base64" - "testing" - "time" - - "github.com/stretchr/testify/require" -) - -func TestExtractH26xVideoSize(t *testing.T) { - type testcase struct { - payload string - width uint32 - height uint32 - isH264 bool - } - - testcases := []testcase{ - {"eAAOZ0LAH4xoBQBboB4RCNQABGjOPIA=", 1280, 720, true}, - {"eAAPZ0LAFoxoCgL3lgHhEI1AAARozjyA", 640, 360, true}, - {"eAAOZ0LADIxoFBl54B4RCNQABGjOPIA=", 320, 180, true}, - {"YAEAGkABDAP//wFgAAADALAAAAMAAAMAXQAAGwJAAC9CAQMBYAAAAwCwAAADAAADAF0AAKACgIAtFiBu5FIy5+E9C+ob1SmoCAgIH8IBBAAHRAHAcvBbJA==", 1280, 720, false}, - {"YAEAGkABDAP//wFgAAADALAAAAMAAAMAPwAAGwJAADBCAQMBYAAAAwCwAAADAAADAD8AAKAFAgFx8uIG7kUjLn4T0L6hvVKagICAgfwgEEAAB0QBwHLwWyQ=", 640, 360, false}, - {"QgEDAWAAAAMAsAAAAwAAAwA8AACgCggMHz4gM7kUhi5+E9C+ob1Q/qoI9VQT6qoK9VVBfqqqDPVVVKagICAgfwgEEA==", 320, 180, false}, - } - - for _, tc := range testcases { - payload, err := base64.StdEncoding.DecodeString(tc.payload) - require.NoError(t, err) - - var sz VideoSize - if tc.isH264 { - sz = ExtractH264VideoSize(payload) - } else { - sz = ExtractH265VideoSize(payload) - } - require.Equal(t, tc.width, sz.Width) - require.Equal(t, tc.height, sz.Height) - } -} - -func TestExtractH264VideoSize_ZeroSizeSTAPA(t *testing.T) { - payload := []byte{0x38, 0x00, 0x00} - defer func() { - if r := recover(); r != nil { - t.Fatalf("panicked: %v", r) - } - }() - _ = ExtractH264VideoSize(payload) -} - -func TestExtractH265VideoSize_ZeroSizeAP(t *testing.T) { - payload := []byte{0x61, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} - defer func() { - if r := recover(); r != nil { - t.Fatalf("panicked: %v", r) - } - }() - _ = ExtractH265VideoSize(payload) -} - -func TestParseH264SPS_EmptyAfterStripStartCode(t *testing.T) { - payload := []byte{0x1c, 0xc0, 0x00, 0x01} - defer func() { - if r := recover(); r != nil { - t.Fatalf("panicked: %v", r) - } - }() - _ = ExtractH264VideoSize(payload) -} - -func TestParseH264SPS_UnboundedPocTypeLoop(t *testing.T) { - payload := []byte{0x27, 0x08, 0x30, 0x30, 0x30, 0x41, 0x30, - 0x00, 0x00, 0x00, 0x7f, 0x27, 0x08, 0xff, 0x7f, 0xa8} - done := make(chan struct{}) - go func() { - _ = ExtractH264VideoSize(payload) - close(done) - }() - select { - case <-done: - case <-time.After(3 * time.Second): - t.Fatal("ExtractH264VideoSize hung — CPU exhaustion") - } -} diff --git a/pkg/sfu/buffer/helpers.go b/pkg/sfu/buffer/helpers.go deleted file mode 100644 index 41c516ba4..000000000 --- a/pkg/sfu/buffer/helpers.go +++ /dev/null @@ -1,479 +0,0 @@ -// Copyright 2023 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package buffer - -import ( - "encoding/binary" - "errors" - - "github.com/pion/rtp/codecs" - - "github.com/livekit/protocol/logger" -) - -var ( - errShortPacket = errors.New("packet is not large enough") - errNilPacket = errors.New("invalid nil packet") - errInvalidPacket = errors.New("invalid packet") -) - -// VP8 is a helper to get temporal data from VP8 packet header -/* - VP8 Payload Descriptor - 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7 - +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ - |X|R|N|S|R| PID | (REQUIRED) |X|R|N|S|R| PID | (REQUIRED) - +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ - X: |I|L|T|K| RSV | (OPTIONAL) X: |I|L|T|K| RSV | (OPTIONAL) - +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ - I: |M| PictureID | (OPTIONAL) I: |M| PictureID | (OPTIONAL) - +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ - L: | TL0PICIDX | (OPTIONAL) | PictureID | - +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ - T/K:|TID|Y| KEYIDX | (OPTIONAL) L: | TL0PICIDX | (OPTIONAL) - +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ - T/K:|TID|Y| KEYIDX | (OPTIONAL) - +-+-+-+-+-+-+-+-+ -*/ -type VP8 struct { - FirstByte byte - S bool - - I bool - M bool - PictureID uint16 /* 7 or 15 bits, picture ID */ - - L bool - TL0PICIDX uint8 /* 8 bits temporal level zero index */ - - // Optional Header If either of the T or K bits are set to 1, - // the TID/Y/KEYIDX extension field MUST be present. - T bool - TID uint8 /* 2 bits temporal layer idx */ - Y bool - - K bool - KEYIDX uint8 /* 5 bits of key frame idx */ - - HeaderSize int - - // IsKeyFrame is a helper to detect if current packet is a keyframe - IsKeyFrame bool -} - -// Unmarshal parses the passed byte slice and stores the result in the VP8 this method is called upon -func (v *VP8) Unmarshal(payload []byte) error { - if payload == nil { - return errNilPacket - } - - payloadLen := len(payload) - if payloadLen < 1 { - return errShortPacket - } - - idx := 0 - v.FirstByte = payload[idx] - v.S = payload[idx]&0x10 > 0 - // Check for extended bit control - if payload[idx]&0x80 > 0 { - idx++ - if payloadLen < idx+1 { - return errShortPacket - } - v.I = payload[idx]&0x80 > 0 - v.L = payload[idx]&0x40 > 0 - v.T = payload[idx]&0x20 > 0 - v.K = payload[idx]&0x10 > 0 - if v.L && !v.T { - return errInvalidPacket - } - - if v.I { - idx++ - if payloadLen < idx+1 { - return errShortPacket - } - pid := payload[idx] & 0x7f - // if m is 1, then Picture ID is 15 bits - v.M = payload[idx]&0x80 > 0 - if v.M { - idx++ - if payloadLen < idx+1 { - return errShortPacket - } - v.PictureID = binary.BigEndian.Uint16([]byte{pid, payload[idx]}) - } else { - v.PictureID = uint16(pid) - } - } - - if v.L { - idx++ - if payloadLen < idx+1 { - return errShortPacket - } - v.TL0PICIDX = payload[idx] - } - - if v.T || v.K { - idx++ - if payloadLen < idx+1 { - return errShortPacket - } - - if v.T { - v.TID = (payload[idx] & 0xc0) >> 6 - v.Y = (payload[idx] & 0x20) > 0 - } - - if v.K { - v.KEYIDX = payload[idx] & 0x1f - } - } - idx++ - if payloadLen < idx+1 { - return errShortPacket - } - - // Check is packet is a keyframe by looking at P bit in vp8 payload - v.IsKeyFrame = payload[idx]&0x01 == 0 && v.S - } else { - idx++ - if payloadLen < idx+1 { - return errShortPacket - } - // Check is packet is a keyframe by looking at P bit in vp8 payload - v.IsKeyFrame = payload[idx]&0x01 == 0 && v.S - } - v.HeaderSize = idx - return nil -} - -func (v VP8) Marshal() ([]byte, error) { - var buf [8]byte - n, err := v.MarshalTo(buf[:]) - if err != nil { - return nil, err - } - return buf[:n], err -} - -func (v VP8) MarshalTo(buf []byte) (int, error) { - if len(buf) < v.HeaderSize { - return 0, errShortPacket - } - - idx := 0 - buf[idx] = v.FirstByte - if v.I || v.L || v.T || v.K { - buf[idx] |= 0x80 // X bit - idx++ - - xpos := idx - xval := byte(0) - - idx++ - if v.I { - xval |= (1 << 7) - if v.M { - buf[idx] = 0x80 | byte((v.PictureID>>8)&0x7f) - buf[idx+1] = byte(v.PictureID & 0xff) - idx += 2 - } else { - buf[idx] = byte(v.PictureID) - idx++ - } - } - - if v.L { - xval |= (1 << 6) - buf[idx] = v.TL0PICIDX - idx++ - } - - if v.T || v.K { - buf[idx] = 0 - if v.T { - xval |= (1 << 5) - buf[idx] = v.TID << 6 - if v.Y { - buf[idx] |= (1 << 5) - } - } - - if v.K { - xval |= (1 << 4) - buf[idx] |= v.KEYIDX & 0x1f - } - idx++ - } - - buf[xpos] = xval - } else { - buf[idx] &^= 0x80 // X bit - idx++ - } - - return idx, nil -} - -// ------------------------------------- - -func VPxPictureIdSizeDiff(mBit1 bool, mBit2 bool) int { - if mBit1 == mBit2 { - return 0 - } - - if mBit1 { - return 1 - } - - return -1 -} - -// ------------------------------------- - -// IsH264KeyFrame detects if h264 payload is a keyframe -// this code was taken from https://github.com/jech/galene/blob/codecs/rtpconn/rtpreader.go#L45 -// all credits belongs to Juliusz Chroboczek @jech and the awesome Galene SFU -func IsH264KeyFrame(payload []byte) bool { - if len(payload) < 1 { - return false - } - nalu := payload[0] & 0x1F - if nalu == 0 { - // reserved - return false - } else if nalu <= 23 { - // simple NALU - return nalu == 7 - } else if nalu == 24 || nalu == 25 || nalu == 26 || nalu == 27 { - // STAP-A, STAP-B, MTAP16 or MTAP24 - i := 1 - if nalu == 25 || nalu == 26 || nalu == 27 { - // skip DON - i += 2 - } - for i < len(payload) { - if i+2 > len(payload) { - return false - } - length := uint16(payload[i])<<8 | - uint16(payload[i+1]) - i += 2 - if i+int(length) > len(payload) { - return false - } - offset := 0 - switch nalu { - case 26: - offset = 3 - case 27: - offset = 4 - } - if offset >= int(length) { - return false - } - n := payload[i+offset] & 0x1F - if n == 7 { - return true - } else if n >= 24 { - // is this legal? - logger.Debugw("Non-simple NALU within a STAP") - } - i += int(length) - } - if i == len(payload) { - return false - } - return false - } else if nalu == 28 || nalu == 29 { - // FU-A or FU-B - if len(payload) < 2 { - return false - } - if (payload[1] & 0x80) == 0 { - // not a starting fragment - return false - } - return payload[1]&0x1F == 7 - } - return false -} - -// ------------------------------------- - -// IsVP9KeyFrame detects if vp9 payload is a keyframe -// taken from https://github.com/jech/galene/blob/master/codecs/codecs.go -// all credits belongs to Juliusz Chroboczek @jech and the awesome Galene SFU -func IsVP9KeyFrame(vp9 *codecs.VP9Packet, payload []byte) bool { - if vp9 == nil { - vp9 = &codecs.VP9Packet{} - _, err := vp9.Unmarshal(payload) - if err != nil || len(vp9.Payload) < 1 { - return false - } - } - - if !vp9.B { - return false - } - - if (vp9.Payload[0] & 0xc0) != 0x80 { - return false - } - - profile := (vp9.Payload[0] >> 4) & 0x3 - if profile != 3 { - return (vp9.Payload[0] & 0xC) == 0 - } - return (vp9.Payload[0] & 0x6) == 0 -} - -// ------------------------------------- - -// IsAV1KeyFrame detects if av1 payload is a keyframe -// taken from https://github.com/jech/galene/blob/master/codecs/codecs.go -// all credits belongs to Juliusz Chroboczek @jech and the awesome Galene SFU -func IsAV1KeyFrame(payload []byte) bool { - if len(payload) < 2 { - return false - } - // Z=0, N=1 - if (payload[0] & 0x88) != 0x08 { - return false - } - w := (payload[0] & 0x30) >> 4 - - getObu := func(data []byte, last bool) ([]byte, int, bool) { - if last { - return data, len(data), false - } - offset := 0 - length := 0 - for { - if len(data) <= offset { - return nil, offset, offset > 0 - } - if offset >= 4 { - return nil, offset, true - } - l := data[offset] - length |= int(l&0x7f) << (offset * 7) - offset++ - if (l & 0x80) == 0 { - break - } - } - if len(data) < offset+length { - return data[offset:], len(data), true - } - return data[offset : offset+length], offset + length, false - } - offset := 1 - i := 0 - for { - obu, length, truncated := - getObu(payload[offset:], int(w) == i+1) - if len(obu) < 1 { - return false - } - tpe := (obu[0] & 0x38) >> 3 - switch i { - case 0: - // OBU_SEQUENCE_HEADER - if tpe != 1 { - return false - } - default: - // OBU_FRAME_HEADER or OBU_FRAME - if tpe == 3 || tpe == 6 { - if len(obu) < 2 { - return false - } - // show_existing_frame == 0 - if (obu[1] & 0x80) != 0 { - return false - } - // frame_type == KEY_FRAME - return (obu[1] & 0x60) == 0 - } - } - if truncated || i >= int(w) { - // the first frame header is in a second - // packet, give up. - return false - } - offset += length - i++ - } -} - -func IsH265KeyFrame(payload []byte) (kf bool) { - if len(payload) < 2 { - return false - } - naluType := (payload[0] & 0x7E) >> 1 - switch naluType { - case 33, 34: - return true - case 48: // AP - idx := 2 - for idx < len(payload)-2 { - // TODO: check the DONL field (controlled by sprop-max-don-diff) - size := binary.BigEndian.Uint16(payload[idx:]) - idx += 2 - if idx >= len(payload) { - return false - } - naluType = (payload[idx] & 0x7E) >> 1 - if naluType == 33 || naluType == 34 { - return true - } - idx += int(size) - } - return false - - case 49: // FU - if len(payload) < 3 { - return false - } - naluType = (payload[2] & 0x7E) >> 1 - return naluType == 33 || naluType == 34 - default: - return false - } -} - -// ExtractVP8VideoSize extracts video resolution from VP8 key frame -func ExtractVP8VideoSize(vp8Packet *VP8, payload []byte) VideoSize { - if !vp8Packet.IsKeyFrame || len(payload) < vp8Packet.HeaderSize+10 { - return VideoSize{} - } - - vp8Payload := payload[vp8Packet.HeaderSize:] - - // Check for VP8 start code - if len(vp8Payload) < 10 || vp8Payload[3] != 0x9D || vp8Payload[4] != 0x01 || vp8Payload[5] != 0x2A { - return VideoSize{} - } - - // Read width and height from bytes 6-9 - width := uint32(vp8Payload[6]) | (uint32(vp8Payload[7]) << 8) - height := uint32(vp8Payload[8]) | (uint32(vp8Payload[9]) << 8) - - return VideoSize{width & 0x3FFF, height & 0x3FFF} -} diff --git a/pkg/sfu/buffer/helpers_test.go b/pkg/sfu/buffer/helpers_test.go deleted file mode 100644 index 378bfbebf..000000000 --- a/pkg/sfu/buffer/helpers_test.go +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright 2023 LiveKit, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package buffer - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func TestVP8Helper_Unmarshal(t *testing.T) { - type args struct { - payload []byte - } - tests := []struct { - name string - args args - wantErr bool - checkTemporal bool - temporalSupport bool - checkKeyFrame bool - keyFrame bool - checkPictureID bool - pictureID uint16 - checkTlzIdx bool - tlzIdx uint8 - checkTempID bool - temporalID uint8 - }{ - { - name: "Empty or nil payload must return error", - args: args{payload: []byte{}}, - wantErr: true, - }, - { - name: "Temporal must be supported by setting T bit to 1", - args: args{payload: []byte{0xff, 0x20, 0x1, 0x2, 0x3, 0x4}}, - checkTemporal: true, - temporalSupport: true, - }, - { - name: "Picture must be ID 7 bits by setting M bit to 0 and present by I bit set to 1", - args: args{payload: []byte{0xff, 0xff, 0x11, 0x2, 0x3, 0x4}}, - checkPictureID: true, - pictureID: 17, - }, - { - name: "Picture ID must be 15 bits by setting M bit to 1 and present by I bit set to 1", - args: args{payload: []byte{0xff, 0xff, 0x92, 0x67, 0x3, 0x4, 0x5}}, - checkPictureID: true, - pictureID: 4711, - }, - { - name: "Temporal level zero index must be present if L set to 1", - args: args{payload: []byte{0xff, 0xff, 0xff, 0xfd, 0xb4, 0x4, 0x5}}, - checkTlzIdx: true, - tlzIdx: 180, - }, - { - name: "Temporal index must be present and used if T bit set to 1", - args: args{payload: []byte{0xff, 0xff, 0xff, 0xfd, 0xb4, 0x9f, 0x5, 0x6}}, - checkTempID: true, - temporalID: 2, - }, - { - name: "Check if packet is a keyframe by looking at P bit set to 0", - args: args{payload: []byte{0xff, 0xff, 0xff, 0xfd, 0xb4, 0x9f, 0x94, 0x1}}, - checkKeyFrame: true, - keyFrame: true, - }, - } - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - p := &VP8{} - if err := p.Unmarshal(tt.args.payload); (err != nil) != tt.wantErr { - t.Errorf("Unmarshal() error = %v, wantErr %v", err, tt.wantErr) - } - if tt.checkTemporal { - require.Equal(t, tt.temporalSupport, p.T) - } - if tt.checkKeyFrame { - require.Equal(t, tt.keyFrame, p.IsKeyFrame) - } - if tt.checkPictureID { - require.Equal(t, tt.pictureID, p.PictureID) - } - if tt.checkTlzIdx { - require.Equal(t, tt.tlzIdx, p.TL0PICIDX) - } - if tt.checkTempID { - require.Equal(t, tt.temporalID, p.TID) - } - }) - } -} - -// ------------------------------------------ diff --git a/pkg/sfu/codecmunger/vp8.go b/pkg/sfu/codecmunger/vp8.go index ab8357cce..f6f45e1e6 100644 --- a/pkg/sfu/codecmunger/vp8.go +++ b/pkg/sfu/codecmunger/vp8.go @@ -17,6 +17,7 @@ package codecmunger import ( "github.com/elliotchance/orderedmap/v3" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/protocol/livekit" "github.com/livekit/protocol/logger" @@ -103,7 +104,7 @@ func (v *VP8) SeedState(seed any) { } func (v *VP8) SetLast(extPkt *buffer.ExtPacket) { - vp8, ok := extPkt.Payload.(buffer.VP8) + vp8, ok := extPkt.Payload.(codec.VP8) if !ok { return } @@ -128,7 +129,7 @@ func (v *VP8) SetLast(extPkt *buffer.ExtPacket) { } func (v *VP8) UpdateOffsets(extPkt *buffer.ExtPacket) { - vp8, ok := extPkt.Payload.(buffer.VP8) + vp8, ok := extPkt.Payload.(codec.VP8) if !ok { return } @@ -153,7 +154,7 @@ func (v *VP8) UpdateOffsets(extPkt *buffer.ExtPacket) { } func (v *VP8) UpdateAndGet(extPkt *buffer.ExtPacket, snOutOfOrder bool, snHasGap bool, maxTemporalLayer int32) (int, []byte, error) { - vp8, ok := extPkt.Payload.(buffer.VP8) + vp8, ok := extPkt.Payload.(codec.VP8) if !ok { return 0, nil, ErrNotVP8 } @@ -174,7 +175,7 @@ func (v *VP8) UpdateAndGet(extPkt *buffer.ExtPacket, snOutOfOrder bool, snHasGap // when it reaches a certain size. mungedPictureId := uint16((extPictureId - pictureIdOffset) & 0x7fff) - vp8Packet := buffer.VP8{ + vp8Packet := codec.VP8{ FirstByte: vp8.FirstByte, I: vp8.I, M: mungedPictureId > 127, @@ -187,7 +188,7 @@ func (v *VP8) UpdateAndGet(extPkt *buffer.ExtPacket, snOutOfOrder bool, snHasGap K: vp8.K, KEYIDX: vp8.KEYIDX - v.keyIdxOffset, IsKeyFrame: vp8.IsKeyFrame, - HeaderSize: vp8.HeaderSize + buffer.VPxPictureIdSizeDiff(mungedPictureId > 127, vp8.M), + HeaderSize: vp8.HeaderSize + codec.VPxPictureIdSizeDiff(mungedPictureId > 127, vp8.M), } vp8HeaderBytes, err := vp8Packet.Marshal() if err != nil { @@ -281,7 +282,7 @@ func (v *VP8) UpdateAndGet(extPkt *buffer.ExtPacket, snOutOfOrder bool, snHasGap v.lastTl0PicIdx = mungedTl0PicIdx v.lastKeyIdx = mungedKeyIdx - vp8Packet := buffer.VP8{ + vp8Packet := codec.VP8{ FirstByte: vp8.FirstByte, I: vp8.I, M: mungedPictureId > 127, @@ -294,7 +295,7 @@ func (v *VP8) UpdateAndGet(extPkt *buffer.ExtPacket, snOutOfOrder bool, snHasGap K: vp8.K, KEYIDX: mungedKeyIdx, IsKeyFrame: vp8.IsKeyFrame, - HeaderSize: vp8.HeaderSize + buffer.VPxPictureIdSizeDiff(mungedPictureId > 127, vp8.M), + HeaderSize: vp8.HeaderSize + codec.VPxPictureIdSizeDiff(mungedPictureId > 127, vp8.M), } vp8HeaderBytes, err := vp8Packet.Marshal() if err != nil { @@ -346,7 +347,7 @@ func (v *VP8) UpdateAndGetPadding(newPicture bool) ([]byte, error) { v.keyIdxOffset -= uint8(offset) } - vp8Packet := &buffer.VP8{ + vp8Packet := &codec.VP8{ FirstByte: 0x10, // partition 0, start of VP8 Partition, reference frame I: v.pictureIdUsed, M: pictureId > 127, diff --git a/pkg/sfu/codecmunger/vp8_test.go b/pkg/sfu/codecmunger/vp8_test.go index 31d33dd65..2c5714bed 100644 --- a/pkg/sfu/codecmunger/vp8_test.go +++ b/pkg/sfu/codecmunger/vp8_test.go @@ -20,9 +20,9 @@ import ( "github.com/stretchr/testify/require" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/protocol/logger" - "github.com/livekit/livekit-server/pkg/sfu/buffer" "github.com/livekit/livekit-server/pkg/sfu/testutils" ) @@ -52,7 +52,7 @@ func TestSetLast(t *testing.T) { Timestamp: 0xabcdef, SSRC: 0x12345678, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -102,7 +102,7 @@ func TestUpdateOffsets(t *testing.T) { Timestamp: 0xabcdef, SSRC: 0x12345678, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -125,7 +125,7 @@ func TestUpdateOffsets(t *testing.T) { Timestamp: 0xabcdef, SSRC: 0x87654321, } - vp8 = &buffer.VP8{ + vp8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -172,7 +172,7 @@ func TestOutOfOrderPictureId(t *testing.T) { Timestamp: 0xabcdef, SSRC: 0x12345678, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -205,7 +205,7 @@ func TestOutOfOrderPictureId(t *testing.T) { vp8.PictureID = 13469 extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - expectedVP8 := &buffer.VP8{ + expectedVP8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -244,7 +244,7 @@ func TestOutOfOrderPictureId(t *testing.T) { vp8.PictureID = 13468 extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - expectedVP8 = &buffer.VP8{ + expectedVP8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -275,7 +275,7 @@ func TestTemporalLayerFiltering(t *testing.T) { Timestamp: 0xabcdef, SSRC: 0x12345678, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -341,7 +341,7 @@ func TestGapInSequenceNumberSamePicture(t *testing.T) { SSRC: 0x12345678, PayloadSize: 33, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -359,7 +359,7 @@ func TestGapInSequenceNumberSamePicture(t *testing.T) { extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) v.SetLast(extPkt) - expectedVP8 := &buffer.VP8{ + expectedVP8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -382,7 +382,7 @@ func TestGapInSequenceNumberSamePicture(t *testing.T) { require.Equal(t, marshalledVP8, buf) // telling there is a gap in sequence number will add pictures to missing picture cache - expectedVP8 = &buffer.VP8{ + expectedVP8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -418,7 +418,7 @@ func TestUpdateAndGetPadding(t *testing.T) { SSRC: 0x12345678, PayloadSize: 20, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -440,7 +440,7 @@ func TestUpdateAndGetPadding(t *testing.T) { // getting padding with repeat of last picture buf, err := v.UpdateAndGetPadding(false) require.NoError(t, err) - expectedVP8 := buffer.VP8{ + expectedVP8 := codec.VP8{ FirstByte: 16, I: true, M: true, @@ -462,7 +462,7 @@ func TestUpdateAndGetPadding(t *testing.T) { // getting padding with new picture buf, err = v.UpdateAndGetPadding(true) require.NoError(t, err) - expectedVP8 = buffer.VP8{ + expectedVP8 = codec.VP8{ FirstByte: 16, I: true, M: true, diff --git a/pkg/sfu/forwarder_test.go b/pkg/sfu/forwarder_test.go index 7f080c646..cca1e69be 100644 --- a/pkg/sfu/forwarder_test.go +++ b/pkg/sfu/forwarder_test.go @@ -20,6 +20,7 @@ import ( "github.com/pion/webrtc/v4" "github.com/stretchr/testify/require" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/protocol/livekit" "github.com/livekit/protocol/logger" @@ -1479,7 +1480,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { Marker: true, IsOutOfOrder: true, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1513,7 +1514,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { PayloadSize: 20, Marker: true, } - vp8 = &buffer.VP8{ + vp8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1551,7 +1552,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { require.Equal(t, expectedTP, actualTP) // should lock onto packet (key frame) - vp8 = &buffer.VP8{ + vp8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1567,7 +1568,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { IsKeyFrame: true, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - expectedVP8 := &buffer.VP8{ + expectedVP8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1648,7 +1649,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { PayloadSize: 20, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - expectedVP8 = &buffer.VP8{ + expectedVP8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1685,7 +1686,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { SSRC: 0x12345678, PayloadSize: 20, } - vp8 = &buffer.VP8{ + vp8 = &codec.VP8{ FirstByte: 25, S: true, I: true, @@ -1702,7 +1703,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { IsKeyFrame: true, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - expectedVP8 = &buffer.VP8{ + expectedVP8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1739,7 +1740,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { SSRC: 0x12345678, PayloadSize: 20, } - vp8 = &buffer.VP8{ + vp8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1774,7 +1775,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { SSRC: 0x12345678, PayloadSize: 20, } - vp8 = &buffer.VP8{ + vp8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1790,7 +1791,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { IsKeyFrame: false, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - expectedVP8 = &buffer.VP8{ + expectedVP8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1871,7 +1872,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { SSRC: 0x87654321, PayloadSize: 20, } - vp8 = &buffer.VP8{ + vp8 = &codec.VP8{ FirstByte: 25, I: true, M: false, @@ -1888,7 +1889,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - expectedVP8 = &buffer.VP8{ + expectedVP8 = &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1930,7 +1931,7 @@ func TestForwarderGetSnTsForPadding(t *testing.T) { SSRC: 0x12345678, PayloadSize: 20, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -1997,7 +1998,7 @@ func TestForwarderGetSnTsForBlankFrames(t *testing.T) { SSRC: 0x12345678, PayloadSize: 20, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -2074,7 +2075,7 @@ func TestForwarderGetPaddingVP8(t *testing.T) { SSRC: 0x12345678, PayloadSize: 20, } - vp8 := &buffer.VP8{ + vp8 := &codec.VP8{ FirstByte: 25, I: true, M: true, @@ -2101,7 +2102,7 @@ func TestForwarderGetPaddingVP8(t *testing.T) { _, _ = f.GetTranslationParams(extPkt, 0) // getting padding with frame end needed, should repeat the last picture id - expectedVP8 := buffer.VP8{ + expectedVP8 := codec.VP8{ FirstByte: 16, I: true, M: true, @@ -2123,7 +2124,7 @@ func TestForwarderGetPaddingVP8(t *testing.T) { require.Equal(t, marshalledVP8, buf) // getting padding with no frame end needed, should get next picture id - expectedVP8 = buffer.VP8{ + expectedVP8 = codec.VP8{ FirstByte: 16, I: true, M: true, diff --git a/pkg/sfu/receiver_base.go b/pkg/sfu/receiver_base.go index e1f1a1169..d056d252a 100644 --- a/pkg/sfu/receiver_base.go +++ b/pkg/sfu/receiver_base.go @@ -27,6 +27,7 @@ import ( "go.uber.org/atomic" "github.com/livekit/mediatransportutil/pkg/bucket" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/protocol/codecs/mime" "github.com/livekit/protocol/livekit" "github.com/livekit/protocol/logger" @@ -145,7 +146,7 @@ type TrackReceiver interface { CodecState() ReceiverCodecState // VideoSizes returns the video size parsed from rtp packet for each spatial layer. - VideoSizes() []buffer.VideoSize + VideoSizes() []codec.VideoSize // closes all associated buffers and issues a resync to all attached downtracks so that // they can resync and have proper sequncing without gaps in sequence numbers / timestamps @@ -211,7 +212,7 @@ type ReceiverBase struct { trackInfo *livekit.TrackInfo videoSizeMu sync.RWMutex - videoSizes [buffer.DefaultMaxLayerSpatial + 1]buffer.VideoSize + videoSizes [buffer.DefaultMaxLayerSpatial + 1]codec.VideoSize onVideoSizeChanged func() rtt uint32 @@ -771,7 +772,7 @@ func (r *ReceiverBase) setupBuffer(buff buffer.BufferProvider, layer int32, rtt rt.ForwardRTCPSenderReport(r.params.Codec.PayloadType, layer, srData) } }) - buff.OnVideoSizeChanged(func(videoSize []buffer.VideoSize) { + buff.OnVideoSizeChanged(func(videoSize []codec.VideoSize) { r.videoSizeMu.Lock() if r.videoLayerMode == livekit.VideoLayer_MULTIPLE_SPATIAL_LAYERS_PER_STREAM { copy(r.videoSizes[:], videoSize) @@ -1248,8 +1249,8 @@ func (r *ReceiverBase) checkCodecChanged(codec webrtc.RTPCodecParameters, header } } -func (r *ReceiverBase) VideoSizes() []buffer.VideoSize { - var sizes []buffer.VideoSize +func (r *ReceiverBase) VideoSizes() []codec.VideoSize { + var sizes []codec.VideoSize r.videoSizeMu.RLock() defer r.videoSizeMu.RUnlock() for _, v := range r.videoSizes { diff --git a/pkg/sfu/testutils/data.go b/pkg/sfu/testutils/data.go index c2835493c..34b1fca62 100644 --- a/pkg/sfu/testutils/data.go +++ b/pkg/sfu/testutils/data.go @@ -21,6 +21,7 @@ import ( "github.com/pion/webrtc/v4" "github.com/livekit/livekit-server/pkg/sfu/buffer" + "github.com/livekit/mediatransportutil/pkg/codec" ) // ----------------------------------------------------------- @@ -69,7 +70,7 @@ func GetTestExtPacket(params *TestExtPacketParams) (*buffer.ExtPacket, error) { ExtTimestamp: uint64(params.TSCycles<<32) + uint64(params.Timestamp), Arrival: params.ArrivalTime.UnixNano(), Packet: &packet, - IsKeyFrame: params.IsKeyFrame, + IsKeyFrame: params.IsKeyFrame, RawPacket: raw, IsOutOfOrder: params.IsOutOfOrder, } @@ -79,7 +80,7 @@ func GetTestExtPacket(params *TestExtPacketParams) (*buffer.ExtPacket, error) { // -------------------------------------- -func GetTestExtPacketVP8(params *TestExtPacketParams, vp8 *buffer.VP8) (*buffer.ExtPacket, error) { +func GetTestExtPacketVP8(params *TestExtPacketParams, vp8 *codec.VP8) (*buffer.ExtPacket, error) { ep, err := GetTestExtPacket(params) if err != nil { return nil, err diff --git a/pkg/sfu/videolayerselector/temporallayerselector/vp8.go b/pkg/sfu/videolayerselector/temporallayerselector/vp8.go index c0a86c4f2..39480c439 100644 --- a/pkg/sfu/videolayerselector/temporallayerselector/vp8.go +++ b/pkg/sfu/videolayerselector/temporallayerselector/vp8.go @@ -16,6 +16,7 @@ package temporallayerselector import ( "github.com/livekit/livekit-server/pkg/sfu/buffer" + "github.com/livekit/mediatransportutil/pkg/codec" "github.com/livekit/protocol/logger" ) @@ -36,7 +37,7 @@ func (v *VP8) Select(extPkt *buffer.ExtPacket, current int32, target int32) (thi return } - vp8, ok := extPkt.Payload.(buffer.VP8) + vp8, ok := extPkt.Payload.(codec.VP8) if !ok { return }