diff --git a/pkg/rtc/participant_sdp.go b/pkg/rtc/participant_sdp.go index 3edb71b9e..524a487fb 100644 --- a/pkg/rtc/participant_sdp.go +++ b/pkg/rtc/participant_sdp.go @@ -131,8 +131,8 @@ func (p *ParticipantImpl) setCodecPreferencesVideoForPublisher(offer webrtc.Sess p.pendingTracksLock.RUnlock() mime = strings.ToUpper(mime) - // remove dd extension if av1 not preferred - if !strings.Contains(mime, "AV1") && !strings.Contains(mime, "VP9") { + // remove dd extension if av1/vp9 not preferred + if !strings.Contains(strings.ToLower(mime), "av1") && !strings.Contains(strings.ToLower(mime), "vp9") { for i, attr := range unmatchVideo.Attributes { if strings.Contains(attr.Value, dd.ExtensionUrl) { unmatchVideo.Attributes[i] = unmatchVideo.Attributes[len(unmatchVideo.Attributes)-1] diff --git a/pkg/sfu/buffer/buffer.go b/pkg/sfu/buffer/buffer.go index 1cfc7b2de..26fb22aca 100644 --- a/pkg/sfu/buffer/buffer.go +++ b/pkg/sfu/buffer/buffer.go @@ -10,6 +10,7 @@ import ( "github.com/gammazero/deque" "github.com/pion/rtcp" "github.com/pion/rtp" + "github.com/pion/rtp/codecs" "github.com/pion/sdp/v3" "github.com/pion/webrtc/v3" "go.uber.org/atomic" @@ -193,8 +194,17 @@ func (b *Buffer) Bind(params webrtc.RTPParameters, codec webrtc.RTPCodecCapabili case strings.HasPrefix(b.mime, "video/"): b.codecType = webrtc.RTPCodecTypeVideo b.bucket = bucket.NewBucket(b.videoPool.Get().(*[]byte)) - if b.frameRateCalculator[0] == nil && strings.EqualFold(codec.MimeType, webrtc.MimeTypeVP8) { - b.frameRateCalculator[0] = NewFrameRateCalculatorVP8(b.clockRate, b.logger) + if b.frameRateCalculator[0] == nil { + if strings.EqualFold(codec.MimeType, webrtc.MimeTypeVP8) { + b.frameRateCalculator[0] = NewFrameRateCalculatorVP8(b.clockRate, b.logger) + } + + if strings.EqualFold(codec.MimeType, webrtc.MimeTypeVP9) { + frc := NewFrameRateCalculatorVP9(b.clockRate, b.logger) + for i := range b.frameRateCalculator { + b.frameRateCalculator[i] = frc.GetFrameRateCalculatorForSpatial(int32(i)) + } + } } default: @@ -560,12 +570,25 @@ func (b *Buffer) getExtPacket(rtpPacket *rtp.Packet, arrivalTime int64) *ExtPack ep.Spatial = InvalidLayerSpatial // vp8 don't have spatial scalability, reset to -1 } ep.Payload = vp8Packet - case "video/h264": - ep.KeyFrame = IsH264Keyframe(rtpPacket.Payload) - case "video/av1": - ep.KeyFrame = IsAV1Keyframe(rtpPacket.Payload) case "video/vp9": - ep.KeyFrame = IsVP9Keyframe(rtpPacket.Payload) + if ep.DependencyDescriptor == nil { + var vp9Packet codecs.VP9Packet + _, err := vp9Packet.Unmarshal(rtpPacket.Payload) + if err != nil { + b.logger.Warnw("could not unmarshal VP9 packet", err) + return nil + } + ep.VideoLayer = VideoLayer{ + Spatial: int32(vp9Packet.SID), + Temporal: int32(vp9Packet.TID), + } + ep.Payload = vp9Packet + } + ep.KeyFrame = IsVP9KeyFrame(rtpPacket.Payload) + case "video/h264": + ep.KeyFrame = IsH264KeyFrame(rtpPacket.Payload) + case "video/av1": + ep.KeyFrame = IsAV1KeyFrame(rtpPacket.Payload) } if ep.KeyFrame { diff --git a/pkg/sfu/buffer/fps.go b/pkg/sfu/buffer/fps.go index ddd6fb467..f8f192227 100644 --- a/pkg/sfu/buffer/fps.go +++ b/pkg/sfu/buffer/fps.go @@ -4,6 +4,7 @@ import ( "container/list" "github.com/livekit/protocol/logger" + "github.com/pion/rtp/codecs" ) var minFramesForCalculation = [DefaultMaxLayerTemporal + 1]int{8, 15, 40} @@ -24,8 +25,9 @@ type FrameRateCalculator interface { } // ----------------------------- -// FrameRateCalculator based on PictureID in VP8 -type FrameRateCalculatorVP8 struct { + +// FrameRateCalculator based on PictureID in VPx +type frameRateCalculatorVPx struct { frameRates [DefaultMaxLayerTemporal + 1]float32 clockRate uint32 logger logger.Logger @@ -36,27 +38,21 @@ type FrameRateCalculatorVP8 struct { completed bool } -func NewFrameRateCalculatorVP8(clockRate uint32, logger logger.Logger) *FrameRateCalculatorVP8 { - return &FrameRateCalculatorVP8{ +func newFrameRateCalculatorVPx(clockRate uint32, logger logger.Logger) *frameRateCalculatorVPx { + return &frameRateCalculatorVPx{ clockRate: clockRate, logger: logger, } } -func (f *FrameRateCalculatorVP8) Completed() bool { +func (f *frameRateCalculatorVPx) Completed() bool { return f.completed } -func (f *FrameRateCalculatorVP8) RecvPacket(ep *ExtPacket) bool { +func (f *frameRateCalculatorVPx) RecvPacket(ep *ExtPacket, fn uint16) bool { if f.completed { return true } - vp8, ok := ep.Payload.(VP8) - if !ok { - f.logger.Debugw("no vp8 payload", "sn", ep.Packet.SequenceNumber) - return false - } - fn := vp8.PictureID if ep.Temporal >= int32(len(f.frameRates)) { f.logger.Warnw("invalid temporal layer", nil, "temporal", ep.Temporal) @@ -113,7 +109,7 @@ func (f *FrameRateCalculatorVP8) RecvPacket(ep *ExtPacket) bool { return f.calc() } -func (f *FrameRateCalculatorVP8) calc() bool { +func (f *frameRateCalculatorVPx) calc() bool { var rateCounter int for currentTemporal := int32(0); currentTemporal <= DefaultMaxLayerTemporal; currentTemporal++ { if f.frameRates[currentTemporal] > 0 { @@ -156,14 +152,13 @@ func (f *FrameRateCalculatorVP8) calc() bool { if f.frameRates[2] > 0 && f.frameRates[2] > f.frameRates[1]*3 { f.frameRates[1] = f.frameRates[2] / 2 } - f.logger.Debugw("frame rate calculated", "rate", f.frameRates) f.reset() return true } return false } -func (f *FrameRateCalculatorVP8) reset() { +func (f *frameRateCalculatorVPx) reset() { for i := range f.firstFrames { f.firstFrames[i] = nil f.secondFrames[i] = nil @@ -175,20 +170,145 @@ func (f *FrameRateCalculatorVP8) reset() { f.baseFrame = nil } -func (f *FrameRateCalculatorVP8) GetFrameRate() []float32 { +func (f *frameRateCalculatorVPx) GetFrameRate() []float32 { return f.frameRates[:] } // ----------------------------- -// FrameRateCalculator based on Dependency descriptor +// FrameRateCalculator based on PictureID in VP8 +type FrameRateCalculatorVP8 struct { + *frameRateCalculatorVPx + logger logger.Logger +} + +func NewFrameRateCalculatorVP8(clockRate uint32, logger logger.Logger) *FrameRateCalculatorVP8 { + return &FrameRateCalculatorVP8{ + frameRateCalculatorVPx: newFrameRateCalculatorVPx(clockRate, logger), + logger: logger, + } +} + +func (f *FrameRateCalculatorVP8) RecvPacket(ep *ExtPacket) bool { + if f.frameRateCalculatorVPx.Completed() { + return true + } + + vp8, ok := ep.Payload.(VP8) + if !ok { + f.logger.Debugw("no vp8 payload", "sn", ep.Packet.SequenceNumber) + return false + } + success := f.frameRateCalculatorVPx.RecvPacket(ep, vp8.PictureID) + + if f.frameRateCalculatorVPx.Completed() { + f.logger.Debugw("frame rate calculated", "rate", f.frameRateCalculatorVPx.GetFrameRate()) + } + + return success +} + +// ----------------------------- + +// FrameRateCalculator based on PictureID in VP9 +type FrameRateCalculatorVP9 struct { + logger logger.Logger + completed bool + + // VP9-TODO - this is assuming three spatial layers. As `completed` marker relies on all layers being finished, have to assume this. FIX. + // Maybe look at number of layers in livekit.TrackInfo and declare completed once advertised layers are measured + frameRateCalculatorsVPx [DefaultMaxLayerSpatial + 1]*frameRateCalculatorVPx +} + +func NewFrameRateCalculatorVP9(clockRate uint32, logger logger.Logger) *FrameRateCalculatorVP9 { + f := &FrameRateCalculatorVP9{ + logger: logger, + } + + for i := range f.frameRateCalculatorsVPx { + f.frameRateCalculatorsVPx[i] = newFrameRateCalculatorVPx(clockRate, logger) + } + + return f +} + +func (f *FrameRateCalculatorVP9) Completed() bool { + return f.completed +} + +func (f *FrameRateCalculatorVP9) RecvPacket(ep *ExtPacket) bool { + if f.completed { + return true + } + + vp9, ok := ep.Payload.(codecs.VP9Packet) + if !ok { + f.logger.Debugw("no vp9 payload", "sn", ep.Packet.SequenceNumber) + return false + } + + if ep.Spatial < 0 || ep.Spatial >= int32(len(f.frameRateCalculatorsVPx)) || f.frameRateCalculatorsVPx[ep.Spatial] == nil { + f.logger.Debugw("invalid spatial layer", "sn", ep.Packet.SequenceNumber, "spatial", ep.Spatial) + return false + } + + success := f.frameRateCalculatorsVPx[ep.Spatial].RecvPacket(ep, vp9.PictureID) + + completed := true + for _, frc := range f.frameRateCalculatorsVPx { + if !frc.Completed() { + completed = false + break + } + } + + if completed { + f.completed = true + + var frameRates [DefaultMaxLayerSpatial + 1][]float32 + for i := range f.frameRateCalculatorsVPx { + frameRates[i] = f.frameRateCalculatorsVPx[i].GetFrameRate() + } + f.logger.Debugw("frame rate calculated", "rate", frameRates) + } + + return success +} + +func (f *FrameRateCalculatorVP9) GetFrameRateForSpatial(spatial int32) []float32 { + if spatial < 0 || spatial >= int32(len(f.frameRateCalculatorsVPx)) || f.frameRateCalculatorsVPx[spatial] == nil { + return nil + } + return f.frameRateCalculatorsVPx[spatial].GetFrameRate() +} + +func (f *FrameRateCalculatorVP9) GetFrameRateCalculatorForSpatial(spatial int32) *FrameRateCalculatorForVP9Layer { + return &FrameRateCalculatorForVP9Layer{ + FrameRateCalculatorVP9: f, + spatial: spatial, + } +} + +// ----------------------------- + +type FrameRateCalculatorForVP9Layer struct { + *FrameRateCalculatorVP9 + spatial int32 +} + +func (f *FrameRateCalculatorForVP9Layer) GetFrameRate() []float32 { + return f.FrameRateCalculatorVP9.GetFrameRateForSpatial(f.spatial) +} + +// ----------------------------------------------- + +// FrameRateCalculator based on Dependency descriptor type FrameRateCalculatorDD struct { frameRates [DefaultMaxLayerSpatial + 1][DefaultMaxLayerTemporal + 1]float32 clockRate uint32 logger logger.Logger firstFrames [DefaultMaxLayerSpatial + 1][DefaultMaxLayerTemporal + 1]*frameInfo secondFrames [DefaultMaxLayerSpatial + 1][DefaultMaxLayerTemporal + 1]*frameInfo - spatial int fnReceived [256]*frameInfo baseFrame *frameInfo completed bool @@ -385,7 +505,7 @@ func (f *FrameRateCalculatorDD) calc() bool { f.completed = true f.close() - f.logger.Debugw("frame rate calculated", "spatial", f.spatial, "rate", f.frameRates) + f.logger.Debugw("frame rate calculated", "rate", f.frameRates) return true } return false @@ -424,6 +544,8 @@ func (f *FrameRateCalculatorDD) GetFrameRateCalculatorForSpatial(spatial int32) } } +// ----------------------------------------------- + type FrameRateCalculatorForDDLayer struct { *FrameRateCalculatorDD spatial int32 @@ -432,3 +554,5 @@ type FrameRateCalculatorForDDLayer struct { func (f *FrameRateCalculatorForDDLayer) GetFrameRate() []float32 { return f.FrameRateCalculatorDD.GetFrameRateForSpatial(f.spatial) } + +// ----------------------------------------------- diff --git a/pkg/sfu/buffer/helpers.go b/pkg/sfu/buffer/helpers.go index 9545ab049..01bb33403 100644 --- a/pkg/sfu/buffer/helpers.go +++ b/pkg/sfu/buffer/helpers.go @@ -4,8 +4,6 @@ import ( "encoding/binary" "errors" - "github.com/pion/rtp/codecs" - "github.com/livekit/protocol/logger" ) @@ -35,22 +33,23 @@ var ( */ type VP8 struct { FirstByte byte + S bool - PictureIDPresent int - PictureID uint16 /* 8 or 16 bits, picture ID */ - MBit bool + I bool + M bool + PictureID uint16 /* 8 or 16 bits, picture ID */ - TL0PICIDXPresent int - TL0PICIDX uint8 /* 8 bits temporal level zero index */ + L bool + TL0PICIDX uint8 /* 8 bits temporal level zero index */ // Optional Header If either of the T or K bits are set to 1, // the TID/Y/KEYIDX extension field MUST be present. - TIDPresent int - TID uint8 /* 2 bits temporal layer idx */ - Y uint8 + T bool + TID uint8 /* 2 bits temporal layer idx */ + Y bool - KEYIDXPresent int - KEYIDX uint8 /* 5 bits of key frame idx */ + K bool + KEYIDX uint8 /* 5 bits of key frame idx */ HeaderSize int @@ -65,96 +64,94 @@ func (v *VP8) Unmarshal(payload []byte) error { } payloadLen := len(payload) - if payloadLen < 1 { return errShortPacket } idx := 0 v.FirstByte = payload[idx] - S := payload[idx]&0x10 > 0 + v.S = payload[idx]&0x10 > 0 // Check for extended bit control if payload[idx]&0x80 > 0 { idx++ if payloadLen < idx+1 { return errShortPacket } - I := payload[idx]&0x80 > 0 - L := payload[idx]&0x40 > 0 - T := payload[idx]&0x20 > 0 - K := payload[idx]&0x10 > 0 - if L && !T { + v.I = payload[idx]&0x80 > 0 + v.L = payload[idx]&0x40 > 0 + v.T = payload[idx]&0x20 > 0 + v.K = payload[idx]&0x10 > 0 + if v.L && !v.T { return errInvalidPacket } - // Check for PictureID - if I { + + if v.I { idx++ if payloadLen < idx+1 { return errShortPacket } - v.PictureIDPresent = 1 pid := payload[idx] & 0x7f - // Check if m is 1, then Picture ID is 15 bits - if payload[idx]&0x80 > 0 { + // if m is 1, then Picture ID is 15 bits + v.M = payload[idx]&0x80 > 0 + if v.M { idx++ if payloadLen < idx+1 { return errShortPacket } - v.MBit = true v.PictureID = binary.BigEndian.Uint16([]byte{pid, payload[idx]}) } else { v.PictureID = uint16(pid) } } - // Check if TL0PICIDX is present - if L { + + if v.L { idx++ if payloadLen < idx+1 { return errShortPacket } - v.TL0PICIDXPresent = 1 - - if idx >= payloadLen { - return errShortPacket - } v.TL0PICIDX = payload[idx] } - if T || K { + + if v.T || v.K { idx++ if payloadLen < idx+1 { return errShortPacket } - if T { - v.TIDPresent = 1 + + if v.T { v.TID = (payload[idx] & 0xc0) >> 6 - v.Y = (payload[idx] & 0x20) >> 5 + v.Y = (payload[idx] & 0x20) > 0 } - if K { - v.KEYIDXPresent = 1 + + if v.K { v.KEYIDX = payload[idx] & 0x1f } } - if idx >= payloadLen { - return errShortPacket - } idx++ if payloadLen < idx+1 { return errShortPacket } + // Check is packet is a keyframe by looking at P bit in vp8 payload - v.IsKeyFrame = payload[idx]&0x01 == 0 && S + v.IsKeyFrame = payload[idx]&0x01 == 0 && v.S } else { idx++ if payloadLen < idx+1 { return errShortPacket } // Check is packet is a keyframe by looking at P bit in vp8 payload - v.IsKeyFrame = payload[idx]&0x01 == 0 && S + v.IsKeyFrame = payload[idx]&0x01 == 0 && v.S } v.HeaderSize = idx return nil } +func (v *VP8) Marshal() ([]byte, error) { + buf := make([]byte, v.HeaderSize) + err := v.MarshalTo(buf) + return buf, err +} + func (v *VP8) MarshalTo(buf []byte) error { if len(buf) < v.HeaderSize { return errShortPacket @@ -162,13 +159,17 @@ func (v *VP8) MarshalTo(buf []byte) error { idx := 0 buf[idx] = v.FirstByte - if (v.PictureIDPresent + v.TL0PICIDXPresent + v.TIDPresent + v.KEYIDXPresent) != 0 { + if v.I || v.L || v.T || v.K { buf[idx] |= 0x80 // X bit idx++ - buf[idx] = byte(v.PictureIDPresent<<7) | byte(v.TL0PICIDXPresent<<6) | byte(v.TIDPresent<<5) | byte(v.KEYIDXPresent<<4) + + xpos := idx + xval := byte(0) + idx++ - if v.PictureIDPresent == 1 { - if v.MBit { + if v.I { + xval |= (1 << 7) + if v.M { buf[idx] = 0x80 | byte((v.PictureID>>8)&0x7f) buf[idx+1] = byte(v.PictureID & 0xff) idx += 2 @@ -177,20 +178,31 @@ func (v *VP8) MarshalTo(buf []byte) error { idx++ } } - if v.TL0PICIDXPresent == 1 { + + if v.L { + xval |= (1 << 6) buf[idx] = v.TL0PICIDX idx++ } - if v.TIDPresent == 1 || v.KEYIDXPresent == 1 { + + if v.T || v.K { buf[idx] = 0 - if v.TIDPresent == 1 { - buf[idx] = v.TID<<6 | v.Y<<5 + if v.T { + xval |= (1 << 5) + buf[idx] = v.TID << 6 + if v.Y { + buf[idx] |= (1 << 5) + } } - if v.KEYIDXPresent == 1 { + + if v.K { + xval |= (1 << 4) buf[idx] |= v.KEYIDX & 0x1f } idx++ } + + buf[xpos] = xval } else { buf[idx] &^= 0x80 // X bit idx++ @@ -199,7 +211,9 @@ func (v *VP8) MarshalTo(buf []byte) error { return nil } -func VP8PictureIdSizeDiff(mBit1 bool, mBit2 bool) int { +// ------------------------------------- + +func VPxPictureIdSizeDiff(mBit1 bool, mBit2 bool) int { if mBit1 == mBit2 { return 0 } @@ -211,10 +225,12 @@ func VP8PictureIdSizeDiff(mBit1 bool, mBit2 bool) int { return -1 } -// IsH264Keyframe detects if h264 payload is a keyframe +// ------------------------------------- + +// IsH264KeyFrame detects if h264 payload is a keyframe // this code was taken from https://github.com/jech/galene/blob/codecs/rtpconn/rtpreader.go#L45 // all credits belongs to Juliusz Chroboczek @jech and the awesome Galene SFU -func IsH264Keyframe(payload []byte) bool { +func IsH264KeyFrame(payload []byte) bool { if len(payload) < 1 { return false } @@ -278,10 +294,65 @@ func IsH264Keyframe(payload []byte) bool { return false } -// IsAV1Keyframe detects if av1 payload is a keyframe +// ------------------------------------- + +func IsVP9KeyFrame(payload []byte) bool { + payloadLen := len(payload) + if payloadLen < 1 { + return false + } + + idx := 0 + I := payload[idx]&0x80 > 0 + P := payload[idx]&0x40 > 0 + L := payload[idx]&0x20 > 0 + F := payload[idx]&0x10 > 0 + B := payload[idx]&0x08 > 0 + + if F && !I { + return false + } + + // Check for PictureID + if I { + idx++ + if payloadLen < idx+1 { + return false + } + // Check if m is 1, then Picture ID is 15 bits + if payload[idx]&0x80 > 0 { + idx++ + if payloadLen < idx+1 { + return false + } + } + } + + // Check if TL0PICIDX is present + sid := -1 + if L { + idx++ + if payloadLen < idx+1 { + return false + } + + tid := (payload[idx] >> 5) & 0x7 + if !P && tid != 0 { + return false + } + + sid = int((payload[idx] >> 1) & 0x7) + } + + return !P && (!L || (L && sid == 0)) && B +} + +// ------------------------------------- + +// IsAV1KeyFrame detects if av1 payload is a keyframe // taken from https://github.com/jech/galene/blob/master/codecs/codecs.go // all credits belongs to Juliusz Chroboczek @jech and the awesome Galene SFU -func IsAV1Keyframe(payload []byte) bool { +func IsAV1KeyFrame(payload []byte) bool { if len(payload) < 2 { return false } @@ -353,28 +424,4 @@ func IsAV1Keyframe(payload []byte) bool { } } -// IsVP9Keyframe detects if vp9 payload is a keyframe -// taken from https://github.com/jech/galene/blob/master/codecs/codecs.go -// all credits belongs to Juliusz Chroboczek @jech and the awesome Galene SFU -func IsVP9Keyframe(payload []byte) bool { - var vp9 codecs.VP9Packet - _, err := vp9.Unmarshal(payload) - if err != nil || len(vp9.Payload) < 1 { - return false - } - if !vp9.B { - return false - } - - if (vp9.Payload[0] & 0xc0) != 0x80 { - return false - } - - profile := (vp9.Payload[0] >> 4) & 0x3 - if profile != 3 { - return (vp9.Payload[0] & 0xC) == 0 - } - return (vp9.Payload[0] & 0x6) == 0 -} - // ------------------------------------- diff --git a/pkg/sfu/buffer/helpers_test.go b/pkg/sfu/buffer/helpers_test.go index d52bce5c5..6ce0ad860 100644 --- a/pkg/sfu/buffer/helpers_test.go +++ b/pkg/sfu/buffer/helpers_test.go @@ -75,7 +75,7 @@ func TestVP8Helper_Unmarshal(t *testing.T) { t.Errorf("Unmarshal() error = %v, wantErr %v", err, tt.wantErr) } if tt.checkTemporal { - require.Equal(t, tt.temporalSupport, p.TIDPresent == 1) + require.Equal(t, tt.temporalSupport, p.T) } if tt.checkKeyFrame { require.Equal(t, tt.keyFrame, p.IsKeyFrame) diff --git a/pkg/sfu/codecmunger/codecmunger.go b/pkg/sfu/codecmunger/codecmunger.go new file mode 100644 index 000000000..eec4f2437 --- /dev/null +++ b/pkg/sfu/codecmunger/codecmunger.go @@ -0,0 +1,25 @@ +package codecmunger + +import ( + "errors" + + "github.com/livekit/livekit-server/pkg/sfu/buffer" +) + +var ( + ErrNotVP8 = errors.New("not VP8") + ErrOutOfOrderVP8PictureIdCacheMiss = errors.New("out-of-order VP8 picture id not found in cache") + ErrFilteredVP8TemporalLayer = errors.New("filtered VP8 temporal layer") +) + +type CodecMunger interface { + GetState() interface{} + SeedState(state interface{}) + + SetLast(extPkt *buffer.ExtPacket) + UpdateOffsets(extPkt *buffer.ExtPacket) + + UpdateAndGet(extPkt *buffer.ExtPacket, snOutOfOrder bool, snHasGap bool, maxTemporal int32) ([]byte, error) + + UpdateAndGetPadding(newPicture bool) ([]byte, error) +} diff --git a/pkg/sfu/codecmunger/null.go b/pkg/sfu/codecmunger/null.go new file mode 100644 index 000000000..e6b3f00cb --- /dev/null +++ b/pkg/sfu/codecmunger/null.go @@ -0,0 +1,34 @@ +package codecmunger + +import ( + "github.com/livekit/livekit-server/pkg/sfu/buffer" + "github.com/livekit/protocol/logger" +) + +type Null struct { +} + +func NewNull(_logger logger.Logger) *Null { + return &Null{} +} + +func (n *Null) GetState() interface{} { + return nil +} + +func (n *Null) SeedState(_state interface{}) { +} + +func (n *Null) SetLast(_extPkt *buffer.ExtPacket) { +} + +func (n *Null) UpdateOffsets(_extPkt *buffer.ExtPacket) { +} + +func (n *Null) UpdateAndGet(_extPkt *buffer.ExtPacket, snOutOfOrder bool, snHasGap bool, maxTemporal int32) ([]byte, error) { + return nil, nil +} + +func (n *Null) UpdateAndGetPadding(newPicture bool) ([]byte, error) { + return nil, nil +} diff --git a/pkg/sfu/vp8munger.go b/pkg/sfu/codecmunger/vp8.go similarity index 71% rename from pkg/sfu/vp8munger.go rename to pkg/sfu/codecmunger/vp8.go index a631a6237..271dac71e 100644 --- a/pkg/sfu/vp8munger.go +++ b/pkg/sfu/codecmunger/vp8.go @@ -1,4 +1,4 @@ -package sfu +package codecmunger import ( "fmt" @@ -16,67 +16,56 @@ const ( exemptedPictureIdsThreshold = 20 ) -// VP8 munger -type TranslationParamsVP8 struct { - Header *buffer.VP8 -} - // ----------------------------------------------------------- -type VP8MungerState struct { +type VP8State struct { ExtLastPictureId int32 - PictureIdUsed int + PictureIdUsed bool LastTl0PicIdx uint8 - Tl0PicIdxUsed int - TidUsed int + Tl0PicIdxUsed bool + TidUsed bool LastKeyIdx uint8 - KeyIdxUsed int + KeyIdxUsed bool } -func (v VP8MungerState) String() string { - return fmt.Sprintf("VP8MungerState{extLastPictureId: %d, pictureIdUsed: %+v, lastTl0PicIdx: %d, tl0PicIdxUsed: %+v, tidUsed: %+v, lastKeyIdx: %d, keyIdxUsed: %+v)", +func (v VP8State) String() string { + return fmt.Sprintf("VP8State{extLastPictureId: %d, pictureIdUsed: %+v, lastTl0PicIdx: %d, tl0PicIdxUsed: %+v, tidUsed: %+v, lastKeyIdx: %d, keyIdxUsed: %+v)", v.ExtLastPictureId, v.PictureIdUsed, v.LastTl0PicIdx, v.Tl0PicIdxUsed, v.TidUsed, v.LastKeyIdx, v.KeyIdxUsed) } // ----------------------------------------------------------- -type VP8MungerParams struct { +type VP8 struct { + logger logger.Logger + pictureIdWrapHandler VP8PictureIdWrapHandler extLastPictureId int32 pictureIdOffset int32 - pictureIdUsed int + pictureIdUsed bool lastTl0PicIdx uint8 tl0PicIdxOffset uint8 - tl0PicIdxUsed int - tidUsed int + tl0PicIdxUsed bool + tidUsed bool lastKeyIdx uint8 keyIdxOffset uint8 - keyIdxUsed int + keyIdxUsed bool missingPictureIds *orderedmap.OrderedMap[int32, int32] droppedPictureIds *orderedmap.OrderedMap[int32, bool] exemptedPictureIds *orderedmap.OrderedMap[int32, bool] } -type VP8Munger struct { - logger logger.Logger - - VP8MungerParams -} - -func NewVP8Munger(logger logger.Logger) *VP8Munger { - return &VP8Munger{ - logger: logger, - VP8MungerParams: VP8MungerParams{ - missingPictureIds: orderedmap.NewOrderedMap[int32, int32](), - droppedPictureIds: orderedmap.NewOrderedMap[int32, bool](), - exemptedPictureIds: orderedmap.NewOrderedMap[int32, bool](), - }, +func NewVP8(logger logger.Logger) *VP8 { + return &VP8{ + logger: logger, + missingPictureIds: orderedmap.NewOrderedMap[int32, int32](), + droppedPictureIds: orderedmap.NewOrderedMap[int32, bool](), + exemptedPictureIds: orderedmap.NewOrderedMap[int32, bool](), } } -func (v *VP8Munger) GetLast() VP8MungerState { - return VP8MungerState{ +func (v *VP8) GetState() interface{} { + return VP8State{ ExtLastPictureId: v.extLastPictureId, PictureIdUsed: v.pictureIdUsed, LastTl0PicIdx: v.lastTl0PicIdx, @@ -87,57 +76,59 @@ func (v *VP8Munger) GetLast() VP8MungerState { } } -func (v *VP8Munger) SeedLast(state VP8MungerState) { - v.extLastPictureId = state.ExtLastPictureId - v.pictureIdUsed = state.PictureIdUsed - v.lastTl0PicIdx = state.LastTl0PicIdx - v.tl0PicIdxUsed = state.Tl0PicIdxUsed - v.tidUsed = state.TidUsed - v.lastKeyIdx = state.LastKeyIdx - v.keyIdxUsed = state.KeyIdxUsed +func (v *VP8) SeedState(seed interface{}) { + if state, ok := seed.(VP8State); ok { + v.extLastPictureId = state.ExtLastPictureId + v.pictureIdUsed = state.PictureIdUsed + v.lastTl0PicIdx = state.LastTl0PicIdx + v.tl0PicIdxUsed = state.Tl0PicIdxUsed + v.tidUsed = state.TidUsed + v.lastKeyIdx = state.LastKeyIdx + v.keyIdxUsed = state.KeyIdxUsed + } } -func (v *VP8Munger) SetLast(extPkt *buffer.ExtPacket) { +func (v *VP8) SetLast(extPkt *buffer.ExtPacket) { vp8, ok := extPkt.Payload.(buffer.VP8) if !ok { return } - v.pictureIdUsed = vp8.PictureIDPresent - if v.pictureIdUsed == 1 { - v.pictureIdWrapHandler.Init(int32(vp8.PictureID)-1, vp8.MBit) + v.pictureIdUsed = vp8.I + if v.pictureIdUsed { + v.pictureIdWrapHandler.Init(int32(vp8.PictureID)-1, vp8.M) v.extLastPictureId = int32(vp8.PictureID) } - v.tl0PicIdxUsed = vp8.TL0PICIDXPresent - if v.tl0PicIdxUsed == 1 { + v.tl0PicIdxUsed = vp8.L + if v.tl0PicIdxUsed { v.lastTl0PicIdx = vp8.TL0PICIDX } - v.tidUsed = vp8.TIDPresent + v.tidUsed = vp8.T - v.keyIdxUsed = vp8.KEYIDXPresent - if v.keyIdxUsed == 1 { + v.keyIdxUsed = vp8.K + if v.keyIdxUsed { v.lastKeyIdx = vp8.KEYIDX } } -func (v *VP8Munger) UpdateOffsets(extPkt *buffer.ExtPacket) { +func (v *VP8) UpdateOffsets(extPkt *buffer.ExtPacket) { vp8, ok := extPkt.Payload.(buffer.VP8) if !ok { return } - if v.pictureIdUsed == 1 { - v.pictureIdWrapHandler.Init(int32(vp8.PictureID)-1, vp8.MBit) + if v.pictureIdUsed { + v.pictureIdWrapHandler.Init(int32(vp8.PictureID)-1, vp8.M) v.pictureIdOffset = int32(vp8.PictureID) - v.extLastPictureId - 1 } - if v.tl0PicIdxUsed == 1 { + if v.tl0PicIdxUsed { v.tl0PicIdxOffset = vp8.TL0PICIDX - v.lastTl0PicIdx - 1 } - if v.keyIdxUsed == 1 { + if v.keyIdxUsed { v.keyIdxOffset = (vp8.KEYIDX - v.lastKeyIdx - 1) & 0x1f } @@ -147,16 +138,16 @@ func (v *VP8Munger) UpdateOffsets(extPkt *buffer.ExtPacket) { v.exemptedPictureIds = orderedmap.NewOrderedMap[int32, bool]() } -func (v *VP8Munger) UpdateAndGet(extPkt *buffer.ExtPacket, ordering SequenceNumberOrdering, maxTemporalLayer int32) (*TranslationParamsVP8, error) { +func (v *VP8) UpdateAndGet(extPkt *buffer.ExtPacket, snOutOfOrder bool, snHasGap bool, maxTemporalLayer int32) ([]byte, error) { vp8, ok := extPkt.Payload.(buffer.VP8) if !ok { return nil, ErrNotVP8 } - extPictureId := v.pictureIdWrapHandler.Unwrap(vp8.PictureID, vp8.MBit) + extPictureId := v.pictureIdWrapHandler.Unwrap(vp8.PictureID, vp8.M) // if out-of-order, look up missing picture id cache - if ordering == SequenceNumberOrderingOutOfOrder { + if snOutOfOrder { pictureIdOffset, ok := v.missingPictureIds.Get(extPictureId) if !ok { return nil, ErrOutOfOrderVP8PictureIdCacheMiss @@ -170,27 +161,25 @@ func (v *VP8Munger) UpdateAndGet(extPkt *buffer.ExtPacket, ordering SequenceNumb mungedPictureId := uint16((extPictureId - pictureIdOffset) & 0x7fff) vp8Packet := &buffer.VP8{ - FirstByte: vp8.FirstByte, - PictureIDPresent: vp8.PictureIDPresent, - PictureID: mungedPictureId, - MBit: mungedPictureId > 127, - TL0PICIDXPresent: vp8.TL0PICIDXPresent, - TL0PICIDX: vp8.TL0PICIDX - v.tl0PicIdxOffset, - TIDPresent: vp8.TIDPresent, - TID: vp8.TID, - Y: vp8.Y, - KEYIDXPresent: vp8.KEYIDXPresent, - KEYIDX: vp8.KEYIDX - v.keyIdxOffset, - IsKeyFrame: vp8.IsKeyFrame, - HeaderSize: vp8.HeaderSize + buffer.VP8PictureIdSizeDiff(mungedPictureId > 127, vp8.MBit), + FirstByte: vp8.FirstByte, + I: vp8.I, + M: mungedPictureId > 127, + PictureID: mungedPictureId, + L: vp8.L, + TL0PICIDX: vp8.TL0PICIDX - v.tl0PicIdxOffset, + T: vp8.T, + TID: vp8.TID, + Y: vp8.Y, + K: vp8.K, + KEYIDX: vp8.KEYIDX - v.keyIdxOffset, + IsKeyFrame: vp8.IsKeyFrame, + HeaderSize: vp8.HeaderSize + buffer.VPxPictureIdSizeDiff(mungedPictureId > 127, vp8.M), } - return &TranslationParamsVP8{ - Header: vp8Packet, - }, nil + return vp8Packet.Marshal() } prevMaxPictureId := v.pictureIdWrapHandler.MaxPictureId() - v.pictureIdWrapHandler.UpdateMaxPictureId(extPictureId, vp8.MBit) + v.pictureIdWrapHandler.UpdateMaxPictureId(extPictureId, vp8.M) // if there is a gap in sequence number, record possible pictures that // the missing packets can belong to in missing picture id cache. @@ -205,7 +194,7 @@ func (v *VP8Munger) UpdateAndGet(extPkt *buffer.ExtPacket, ordering SequenceNumb // it is possible to deduce that (for example by looking at previous packet's RTP marker // and check if that was the last packet of Picture 10), it could get complicated when // the gap is larger. - if ordering == SequenceNumberOrderingGap { + if snHasGap { for lostPictureId := prevMaxPictureId; lostPictureId <= extPictureId; lostPictureId++ { // Record missing only if picture id was not dropped. This is to avoid a subsequent packet of dropped frame going through. // A sequence like this @@ -229,7 +218,7 @@ func (v *VP8Munger) UpdateAndGet(extPkt *buffer.ExtPacket, ordering SequenceNumb // which layer the missing packets belong to. A layer could have multiple packets. So, keep track // of pictures that are forwarded even though they will be filterd out based on temporal layer // requirements. That allows forwarding of the complete picture. - if vp8.TIDPresent == 1 && vp8.TID > uint8(maxTemporalLayer) { + if vp8.T && vp8.TID > uint8(maxTemporalLayer) { v.exemptedPictureIds.Set(extPictureId, true) // trim cache if necessary for v.exemptedPictureIds.Len() > exemptedPictureIdsThreshold { @@ -238,12 +227,12 @@ func (v *VP8Munger) UpdateAndGet(extPkt *buffer.ExtPacket, ordering SequenceNumb } } } else { - if vp8.TIDPresent == 1 && vp8.TID > uint8(maxTemporalLayer) { + if vp8.T && vp8.TID > uint8(maxTemporalLayer) { // drop only if not exempted _, ok := v.exemptedPictureIds.Get(extPictureId) if !ok { // adjust only once per picture as a picture could have multiple packets - if vp8.PictureIDPresent == 1 && prevMaxPictureId != extPictureId { + if vp8.I && prevMaxPictureId != extPictureId { // keep track of dropped picture ids so that they do not get into the missing picture cache v.droppedPictureIds.Set(extPictureId, true) // trim cache if necessary @@ -275,38 +264,36 @@ func (v *VP8Munger) UpdateAndGet(extPkt *buffer.ExtPacket, ordering SequenceNumb v.lastKeyIdx = mungedKeyIdx vp8Packet := &buffer.VP8{ - FirstByte: vp8.FirstByte, - PictureIDPresent: vp8.PictureIDPresent, - PictureID: mungedPictureId, - MBit: mungedPictureId > 127, - TL0PICIDXPresent: vp8.TL0PICIDXPresent, - TL0PICIDX: mungedTl0PicIdx, - TIDPresent: vp8.TIDPresent, - TID: vp8.TID, - Y: vp8.Y, - KEYIDXPresent: vp8.KEYIDXPresent, - KEYIDX: mungedKeyIdx, - IsKeyFrame: vp8.IsKeyFrame, - HeaderSize: vp8.HeaderSize + buffer.VP8PictureIdSizeDiff(mungedPictureId > 127, vp8.MBit), + FirstByte: vp8.FirstByte, + I: vp8.I, + M: mungedPictureId > 127, + PictureID: mungedPictureId, + L: vp8.L, + TL0PICIDX: mungedTl0PicIdx, + T: vp8.T, + TID: vp8.TID, + Y: vp8.Y, + K: vp8.K, + KEYIDX: mungedKeyIdx, + IsKeyFrame: vp8.IsKeyFrame, + HeaderSize: vp8.HeaderSize + buffer.VPxPictureIdSizeDiff(mungedPictureId > 127, vp8.M), } - return &TranslationParamsVP8{ - Header: vp8Packet, - }, nil + return vp8Packet.Marshal() } -func (v *VP8Munger) UpdateAndGetPadding(newPicture bool) *buffer.VP8 { +func (v *VP8) UpdateAndGetPadding(newPicture bool) ([]byte, error) { offset := 0 if newPicture { offset = 1 } headerSize := 1 - if (v.pictureIdUsed + v.tl0PicIdxUsed + v.tidUsed + v.keyIdxUsed) != 0 { + if v.pictureIdUsed || v.tl0PicIdxUsed || v.tidUsed || v.keyIdxUsed { headerSize += 1 } extPictureId := v.extLastPictureId - if v.pictureIdUsed == 1 { + if v.pictureIdUsed { extPictureId = v.extLastPictureId + int32(offset) v.extLastPictureId = extPictureId v.pictureIdOffset -= int32(offset) @@ -319,44 +306,44 @@ func (v *VP8Munger) UpdateAndGetPadding(newPicture bool) *buffer.VP8 { pictureId := uint16(extPictureId & 0x7fff) tl0PicIdx := uint8(0) - if v.tl0PicIdxUsed == 1 { + if v.tl0PicIdxUsed { tl0PicIdx = v.lastTl0PicIdx + uint8(offset) v.lastTl0PicIdx = tl0PicIdx v.tl0PicIdxOffset -= uint8(offset) headerSize += 1 } - if (v.tidUsed + v.keyIdxUsed) != 0 { + if v.tidUsed || v.keyIdxUsed { headerSize += 1 } keyIdx := uint8(0) - if v.keyIdxUsed == 1 { + if v.keyIdxUsed { keyIdx = (v.lastKeyIdx + uint8(offset)) & 0x1f v.lastKeyIdx = keyIdx v.keyIdxOffset -= uint8(offset) } vp8Packet := &buffer.VP8{ - FirstByte: 0x10, // partition 0, start of VP8 Partition, reference frame - PictureIDPresent: v.pictureIdUsed, - PictureID: pictureId, - MBit: pictureId > 127, - TL0PICIDXPresent: v.tl0PicIdxUsed, - TL0PICIDX: tl0PicIdx, - TIDPresent: v.tidUsed, - TID: 0, - Y: 1, - KEYIDXPresent: v.keyIdxUsed, - KEYIDX: keyIdx, - IsKeyFrame: true, - HeaderSize: headerSize, + FirstByte: 0x10, // partition 0, start of VP8 Partition, reference frame + I: v.pictureIdUsed, + M: pictureId > 127, + PictureID: pictureId, + L: v.tl0PicIdxUsed, + TL0PICIDX: tl0PicIdx, + T: v.tidUsed, + TID: 0, + Y: true, + K: v.keyIdxUsed, + KEYIDX: keyIdx, + IsKeyFrame: true, + HeaderSize: headerSize, } - return vp8Packet + return vp8Packet.Marshal() } // for testing only -func (v *VP8Munger) PictureIdOffset(extPictureId int32) (int32, bool) { +func (v *VP8) PictureIdOffset(extPictureId int32) (int32, bool) { return v.missingPictureIds.Get(extPictureId) } diff --git a/pkg/sfu/vp8munger_test.go b/pkg/sfu/codecmunger/vp8_test.go similarity index 51% rename from pkg/sfu/vp8munger_test.go rename to pkg/sfu/codecmunger/vp8_test.go index 0b32fc4db..93c27086c 100644 --- a/pkg/sfu/vp8munger_test.go +++ b/pkg/sfu/codecmunger/vp8_test.go @@ -1,4 +1,4 @@ -package sfu +package codecmunger import ( "reflect" @@ -12,7 +12,7 @@ import ( "github.com/livekit/livekit-server/pkg/sfu/testutils" ) -func compare(expected *VP8Munger, actual *VP8Munger) bool { +func compare(expected *VP8, actual *VP8) bool { return reflect.DeepEqual(expected.pictureIdWrapHandler, actual.pictureIdWrapHandler) && expected.extLastPictureId == actual.extLastPictureId && expected.pictureIdOffset == actual.pictureIdOffset && @@ -26,12 +26,12 @@ func compare(expected *VP8Munger, actual *VP8Munger) bool { expected.keyIdxUsed == actual.keyIdxUsed } -func newVP8Munger() *VP8Munger { - return NewVP8Munger(logger.GetLogger()) +func newVP8() *VP8 { + return NewVP8(logger.GetLogger()) } func TestSetLast(t *testing.T) { - v := newVP8Munger() + v := newVP8() params := &testutils.TestExtPacketParams{ SequenceNumber: 23333, @@ -39,51 +39,49 @@ func TestSetLast(t *testing.T) { SSRC: 0x12345678, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 13, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 13, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, err := testutils.GetTestExtPacketVP8(params, vp8) require.NoError(t, err) require.NotNil(t, extPkt) - expectedVP8Munger := VP8Munger{ - VP8MungerParams: VP8MungerParams{ - pictureIdWrapHandler: VP8PictureIdWrapHandler{ - maxPictureId: 13466, - maxMBit: true, - totalWrap: 0, - lastWrap: 0, - }, - extLastPictureId: 13467, - pictureIdOffset: 0, - pictureIdUsed: 1, - lastTl0PicIdx: 233, - tl0PicIdxOffset: 0, - tl0PicIdxUsed: 1, - tidUsed: 1, - lastKeyIdx: 23, - keyIdxOffset: 0, - keyIdxUsed: 1, + expectedVP8 := VP8{ + pictureIdWrapHandler: VP8PictureIdWrapHandler{ + maxPictureId: 13466, + maxMBit: true, + totalWrap: 0, + lastWrap: 0, }, + extLastPictureId: 13467, + pictureIdOffset: 0, + pictureIdUsed: true, + lastTl0PicIdx: 233, + tl0PicIdxOffset: 0, + tl0PicIdxUsed: true, + tidUsed: true, + lastKeyIdx: 23, + keyIdxOffset: 0, + keyIdxUsed: true, } v.SetLast(extPkt) - require.True(t, compare(&expectedVP8Munger, v)) + require.True(t, compare(&expectedVP8, v)) } func TestUpdateOffsets(t *testing.T) { - v := newVP8Munger() + v := newVP8() params := &testutils.TestExtPacketParams{ SequenceNumber: 23333, @@ -91,19 +89,19 @@ func TestUpdateOffsets(t *testing.T) { SSRC: 0x12345678, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 13, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 13, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) v.SetLast(extPkt) @@ -114,48 +112,46 @@ func TestUpdateOffsets(t *testing.T) { SSRC: 0x87654321, } vp8 = &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 345, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 12, - TIDPresent: 1, - TID: 13, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 4, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 345, + L: true, + TL0PICIDX: 12, + T: true, + TID: 13, + Y: true, + K: true, + KEYIDX: 4, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) v.UpdateOffsets(extPkt) - expectedVP8Munger := VP8Munger{ - VP8MungerParams: VP8MungerParams{ - pictureIdWrapHandler: VP8PictureIdWrapHandler{ - maxPictureId: 344, - maxMBit: true, - totalWrap: 0, - lastWrap: 0, - }, - extLastPictureId: 13467, - pictureIdOffset: 345 - 13467 - 1, - pictureIdUsed: 1, - lastTl0PicIdx: 233, - tl0PicIdxOffset: (12 - 233 - 1) & 0xff, - tl0PicIdxUsed: 1, - tidUsed: 1, - lastKeyIdx: 23, - keyIdxOffset: (4 - 23 - 1) & 0x1f, - keyIdxUsed: 1, + expectedVP8 := VP8{ + pictureIdWrapHandler: VP8PictureIdWrapHandler{ + maxPictureId: 344, + maxMBit: true, + totalWrap: 0, + lastWrap: 0, }, + extLastPictureId: 13467, + pictureIdOffset: 345 - 13467 - 1, + pictureIdUsed: true, + lastTl0PicIdx: 233, + tl0PicIdxOffset: (12 - 233 - 1) & 0xff, + tl0PicIdxUsed: true, + tidUsed: true, + lastKeyIdx: 23, + keyIdxOffset: (4 - 23 - 1) & 0x1f, + keyIdxUsed: true, } - require.True(t, compare(&expectedVP8Munger, v)) + require.True(t, compare(&expectedVP8, v)) } func TestOutOfOrderPictureId(t *testing.T) { - v := newVP8Munger() + v := newVP8() params := &testutils.TestExtPacketParams{ SequenceNumber: 23333, @@ -163,58 +159,57 @@ func TestOutOfOrderPictureId(t *testing.T) { SSRC: 0x12345678, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 1, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) v.SetLast(extPkt) - v.UpdateAndGet(extPkt, SequenceNumberOrderingContiguous, 2) + v.UpdateAndGet(extPkt, false, false, 2) // out-of-order sequence number not in the missing picture id cache vp8.PictureID = 13466 extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - tp, err := v.UpdateAndGet(extPkt, SequenceNumberOrderingOutOfOrder, 2) + codecBytes, err := v.UpdateAndGet(extPkt, true, false, 2) require.Error(t, err) require.ErrorIs(t, err, ErrOutOfOrderVP8PictureIdCacheMiss) - require.Nil(t, tp) + require.Nil(t, codecBytes) // create a hole in picture id vp8.PictureID = 13469 extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - tpExpected := TranslationParamsVP8{ - Header: &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13469, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, - }, + expectedVP8 := &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13469, + L: true, + TL0PICIDX: 233, + T: true, + TID: 1, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } - tp, err = v.UpdateAndGet(extPkt, SequenceNumberOrderingGap, 2) + marshalledVP8, err := expectedVP8.Marshal() require.NoError(t, err) - require.NotNil(t, tp) - require.Equal(t, tpExpected, *tp) + codecBytes, err = v.UpdateAndGet(extPkt, false, true, 2) + require.NoError(t, err) + require.Equal(t, marshalledVP8, codecBytes) // all three, the last, the current and the in-between should have been added to missing picture id cache value, ok := v.PictureIdOffset(13467) @@ -233,31 +228,30 @@ func TestOutOfOrderPictureId(t *testing.T) { vp8.PictureID = 13468 extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - tpExpected = TranslationParamsVP8{ - Header: &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13468, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, - }, + expectedVP8 = &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13468, + L: true, + TL0PICIDX: 233, + T: true, + TID: 1, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } - tp, err = v.UpdateAndGet(extPkt, SequenceNumberOrderingOutOfOrder, 2) + marshalledVP8, err = expectedVP8.Marshal() require.NoError(t, err) - require.NotNil(t, tp) - require.Equal(t, tpExpected, *tp) + codecBytes, err = v.UpdateAndGet(extPkt, true, false, 2) + require.NoError(t, err) + require.Equal(t, marshalledVP8, codecBytes) } func TestTemporalLayerFiltering(t *testing.T) { - v := newVP8Munger() + v := newVP8() params := &testutils.TestExtPacketParams{ SequenceNumber: 23333, @@ -265,25 +259,25 @@ func TestTemporalLayerFiltering(t *testing.T) { SSRC: 0x12345678, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 1, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) v.SetLast(extPkt) // translate - tp, err := v.UpdateAndGet(extPkt, SequenceNumberOrderingContiguous, 0) + tp, err := v.UpdateAndGet(extPkt, false, false, 0) require.Error(t, err) require.ErrorIs(t, err, ErrFilteredVP8TemporalLayer) require.Nil(t, tp) @@ -296,7 +290,7 @@ func TestTemporalLayerFiltering(t *testing.T) { params.SequenceNumber = 23334 extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - tp, err = v.UpdateAndGet(extPkt, SequenceNumberOrderingContiguous, 0) + tp, err = v.UpdateAndGet(extPkt, false, false, 0) require.Error(t, err) require.ErrorIs(t, err, ErrFilteredVP8TemporalLayer) require.Nil(t, tp) @@ -309,7 +303,7 @@ func TestTemporalLayerFiltering(t *testing.T) { params.SequenceNumber = 23337 extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) - tp, err = v.UpdateAndGet(extPkt, SequenceNumberOrderingContiguous, 0) + tp, err = v.UpdateAndGet(extPkt, false, false, 0) require.Error(t, err) require.ErrorIs(t, err, ErrFilteredVP8TemporalLayer) require.Nil(t, tp) @@ -319,7 +313,7 @@ func TestTemporalLayerFiltering(t *testing.T) { } func TestGapInSequenceNumberSamePicture(t *testing.T) { - v := newVP8Munger() + v := newVP8() params := &testutils.TestExtPacketParams{ SequenceNumber: 65533, @@ -328,65 +322,65 @@ func TestGapInSequenceNumberSamePicture(t *testing.T) { PayloadSize: 33, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 1, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) v.SetLast(extPkt) - tpExpected := TranslationParamsVP8{ - Header: &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, - }, + expectedVP8 := &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 1, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } - tp, err := v.UpdateAndGet(extPkt, SequenceNumberOrderingContiguous, 2) + marshalledVP8, err := expectedVP8.Marshal() require.NoError(t, err) - require.Equal(t, tpExpected, *tp) + codecBytes, err := v.UpdateAndGet(extPkt, false, false, 2) + require.NoError(t, err) + require.Equal(t, marshalledVP8, codecBytes) // telling there is a gap in sequence number will add pictures to missing picture cache - tpExpected = TranslationParamsVP8{ - Header: &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, - }, + expectedVP8 = &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 1, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } - tp, err = v.UpdateAndGet(extPkt, SequenceNumberOrderingGap, 2) + marshalledVP8, err = expectedVP8.Marshal() require.NoError(t, err) - require.Equal(t, tpExpected, *tp) + codecBytes, err = v.UpdateAndGet(extPkt, false, true, 2) + require.NoError(t, err) + require.Equal(t, marshalledVP8, codecBytes) value, ok := v.PictureIdOffset(13467) require.True(t, ok) @@ -394,7 +388,7 @@ func TestGapInSequenceNumberSamePicture(t *testing.T) { } func TestUpdateAndGetPadding(t *testing.T) { - v := newVP8Munger() + v := newVP8() params := &testutils.TestExtPacketParams{ SequenceNumber: 23333, @@ -403,61 +397,67 @@ func TestUpdateAndGetPadding(t *testing.T) { PayloadSize: 20, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 13, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 13, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) v.SetLast(extPkt) // getting padding with repeat of last picture - blankVP8 := v.UpdateAndGetPadding(false) + blankBytes, err := v.UpdateAndGetPadding(false) + require.NoError(t, err) expectedVP8 := buffer.VP8{ - FirstByte: 16, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 0, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 16, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } - require.Equal(t, expectedVP8, *blankVP8) + marshalledVP8, err := expectedVP8.Marshal() + require.NoError(t, err) + require.Equal(t, marshalledVP8, blankBytes) // getting padding with new picture - blankVP8 = v.UpdateAndGetPadding(true) + blankBytes, err = v.UpdateAndGetPadding(true) + require.NoError(t, err) expectedVP8 = buffer.VP8{ - FirstByte: 16, - PictureIDPresent: 1, - PictureID: 13468, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 234, - TIDPresent: 1, - TID: 0, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 24, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 16, + I: true, + M: true, + PictureID: 13468, + L: true, + TL0PICIDX: 234, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 24, + HeaderSize: 6, + IsKeyFrame: true, } - require.Equal(t, expectedVP8, *blankVP8) + marshalledVP8, err = expectedVP8.Marshal() + require.NoError(t, err) + require.Equal(t, marshalledVP8, blankBytes) } func TestVP8PictureIdWrapHandler(t *testing.T) { diff --git a/pkg/sfu/downtrack.go b/pkg/sfu/downtrack.go index 0fee2e8ca..3c75064fc 100644 --- a/pkg/sfu/downtrack.go +++ b/pkg/sfu/downtrack.go @@ -65,11 +65,7 @@ var ( ErrPaddingOnlyPacket = errors.New("padding only packet that need not be forwarded") ErrDuplicatePacket = errors.New("duplicate packet") ErrPaddingNotOnFrameBoundary = errors.New("padding cannot send on non-frame boundary") - ErrNotVP8 = errors.New("not VP8") - ErrOutOfOrderVP8PictureIdCacheMiss = errors.New("out-of-order VP8 picture id not found in cache") - ErrFilteredVP8TemporalLayer = errors.New("filtered VP8 temporal layer") ErrDownTrackAlreadyBound = errors.New("already bound") - ErrDownTrackClosed = errors.New("downtrack closed") ) var ( @@ -143,8 +139,8 @@ type DownTrackStreamAllocatorListener interface { // subscribed max video layer changed OnSubscribedLayersChanged(dt *DownTrack, layers buffer.VideoLayer) - // target video layer reached - OnTargetLayerReached(dt *DownTrack) + // stream resumed + OnResume(dt *DownTrack) // packet(s) sent OnPacketsSent(dt *DownTrack, size int) @@ -209,8 +205,7 @@ type DownTrack struct { connectionStats *connectionquality.ConnectionStats deltaStatsSnapshotId uint32 - // Debug info - pktsDropped atomic.Uint32 + // for throttling error logs writeIOErrors atomic.Uint32 isNACKThrottled atomic.Bool @@ -342,13 +337,14 @@ func (d *DownTrack) Bind(t webrtc.TrackLocalContext) (webrtc.RTPCodecParameters, } d.codec = codec.RTPCodecCapability - d.forwarder.DetermineCodec(d.codec) if d.onBinding != nil { d.onBinding() } d.bound.Store(true) d.bindLock.Unlock() + d.forwarder.DetermineCodec(d.codec, d.receiver.HeaderExtensions()) + d.logger.Debugw("downtrack bound") d.onBindAndConnected() @@ -551,9 +547,6 @@ func (d *DownTrack) WriteRTP(extPkt *buffer.ExtPacket, layer int32) error { tp, err := d.forwarder.GetTranslationParams(extPkt, layer) if tp.shouldDrop { - if tp.isDroppingRelevant { - d.pktsDropped.Inc() - } if err != nil { d.logger.Errorw("write rtp packet failed", err) } @@ -561,39 +554,32 @@ func (d *DownTrack) WriteRTP(extPkt *buffer.ExtPacket, layer int32) error { } payload := extPkt.Packet.Payload - if tp.vp8 != nil { + if len(tp.codecBytes) != 0 { incomingVP8, _ := extPkt.Payload.(buffer.VP8) pool = PacketFactory.Get().(*[]byte) - payload, err = d.translateVP8PacketTo(extPkt.Packet, &incomingVP8, tp.vp8.Header, pool) - if err != nil { - d.pktsDropped.Inc() - d.logger.Errorw("write rtp packet failed", err) - return err - } + payload = d.translateVP8PacketTo(extPkt.Packet, &incomingVP8, tp.codecBytes, pool) } var meta *packetMeta if d.sequencer != nil { meta = d.sequencer.push(extPkt.Packet.SequenceNumber, tp.rtp.sequenceNumber, tp.rtp.timestamp, int8(layer)) - if meta != nil && tp.vp8 != nil { - meta.packVP8(tp.vp8.Header) + if meta != nil { + meta.codecBytes = append(meta.codecBytes, tp.codecBytes...) } } hdr, err := d.getTranslatedRTPHeader(extPkt, tp) if err != nil { - d.pktsDropped.Inc() d.logger.Errorw("write rtp packet failed", err) return err } if meta != nil && d.dependencyDescriptorID != 0 { - meta.ddBytes = hdr.GetExtension(uint8(d.dependencyDescriptorID)) + meta.ddBytes = append(meta.ddBytes, tp.ddBytes...) } _, err = d.writeStream.WriteRTP(hdr, payload) if err != nil { - d.pktsDropped.Inc() if errors.Is(err, io.ErrClosedPipe) { writeIOErrors := d.writeIOErrors.Inc() if (writeIOErrors % 100) == 1 { @@ -611,22 +597,23 @@ func (d *DownTrack) WriteRTP(extPkt *buffer.ExtPacket, layer int32) error { d.onMaxSubscribedLayerChanged(d, layer) } - if extPkt.KeyFrame || tp.isSwitchingToTargetLayer { + if extPkt.KeyFrame { d.isNACKThrottled.Store(false) if extPkt.KeyFrame { d.rtpStats.UpdateKeyFrame(1) d.logger.Debugw("forwarding key frame", "layer", layer) } + // SVC-TODO - no need for key frame always when using SVC locked, _ := d.forwarder.CheckSync() if locked { d.stopKeyFrameRequester() } + } - if tp.isSwitchingToTargetLayer { - if sal := d.getStreamAllocatorListener(); sal != nil { - sal.OnTargetLayerReached(d) - } + if tp.isResuming { + if sal := d.getStreamAllocatorListener(); sal != nil { + sal.OnResume(d) } } @@ -1222,20 +1209,18 @@ func (d *DownTrack) writeOpusRedBlankFrame(hdr *rtp.Header, frameEndNeeded bool) } func (d *DownTrack) writeVP8BlankFrame(hdr *rtp.Header, frameEndNeeded bool) (int, error) { - blankVP8 := d.forwarder.GetPaddingVP8(frameEndNeeded) + blankVP8, err := d.forwarder.GetPadding(frameEndNeeded) + if err != nil { + return 0, err + } // 8x8 key frame // Used even when closing out a previous frame. Looks like receivers // do not care about content (it will probably end up being an undecodable // frame, but that should be okay as there are key frames following) - payload := make([]byte, blankVP8.HeaderSize+len(VP8KeyFrame8x8)) - vp8Header := payload[:blankVP8.HeaderSize] - err := blankVP8.MarshalTo(vp8Header) - if err != nil { - return 0, err - } - - copy(payload[blankVP8.HeaderSize:], VP8KeyFrame8x8) + payload := make([]byte, len(blankVP8)+len(VP8KeyFrame8x8)) + copy(payload[:len(blankVP8)], blankVP8) + copy(payload[len(blankVP8):], VP8KeyFrame8x8) _, err = d.writeStream.WriteRTP(hdr, payload) if err == nil { @@ -1451,13 +1436,9 @@ func (d *DownTrack) retransmitPackets(nacks []uint16) { continue } - translatedVP8 := meta.unpackVP8() + translatedVP8 := meta.codecBytes pool = PacketFactory.Get().(*[]byte) - payload, err = d.translateVP8PacketTo(&pkt, &incomingVP8, translatedVP8, pool) - if err != nil { - d.logger.Errorw("translating VP8 packet err", err) - continue - } + payload = d.translateVP8PacketTo(&pkt, &incomingVP8, translatedVP8, pool) } var extraExtensions []extensionData @@ -1533,16 +1514,11 @@ func (d *DownTrack) getTranslatedRTPHeader(extPkt *buffer.ExtPacket, tp *Transla } var extension []extensionData - if d.dependencyDescriptorID != 0 && tp.ddExtension != nil { - bytes, err := tp.ddExtension.Marshal() - if err != nil { - d.logger.Warnw("error marshalling dependency descriptor extension", err) - } else { - extension = append(extension, extensionData{ - id: uint8(d.dependencyDescriptorID), - payload: bytes, - }) - } + if d.dependencyDescriptorID != 0 && len(tp.ddBytes) != 0 { + extension = append(extension, extensionData{ + id: uint8(d.dependencyDescriptorID), + payload: tp.ddBytes, + }) } err := d.writeRTPHeaderExtensions(&hdr, extension...) if err != nil { @@ -1552,14 +1528,14 @@ func (d *DownTrack) getTranslatedRTPHeader(extPkt *buffer.ExtPacket, tp *Transla return &hdr, nil } -func (d *DownTrack) translateVP8PacketTo(pkt *rtp.Packet, incomingVP8 *buffer.VP8, translatedVP8 *buffer.VP8, outbuf *[]byte) ([]byte, error) { - buf := (*outbuf)[:len(pkt.Payload)+translatedVP8.HeaderSize-incomingVP8.HeaderSize] +func (d *DownTrack) translateVP8PacketTo(pkt *rtp.Packet, incomingVP8 *buffer.VP8, translatedVP8 []byte, outbuf *[]byte) []byte { + buf := (*outbuf)[:len(pkt.Payload)+len(translatedVP8)-incomingVP8.HeaderSize] srcPayload := pkt.Payload[incomingVP8.HeaderSize:] - dstPayload := buf[translatedVP8.HeaderSize:] + dstPayload := buf[len(translatedVP8):] copy(dstPayload, srcPayload) - err := translatedVP8.MarshalTo(buf[:translatedVP8.HeaderSize]) - return buf, err + copy(buf[:len(translatedVP8)], translatedVP8) + return buf } func (d *DownTrack) DebugInfo() map[string]interface{} { @@ -1572,7 +1548,6 @@ func (d *DownTrack) DebugInfo() map[string]interface{} { "TSOffset": rtpMungerParams.tsOffset, "LastMarker": rtpMungerParams.lastMarker, "LastPli": d.rtpStats.LastPli(), - "PacketsDropped": d.pktsDropped.Load(), } senderReport := d.CreateSenderReport() diff --git a/pkg/sfu/forwarder.go b/pkg/sfu/forwarder.go index 1104fb6d0..f29ad906f 100644 --- a/pkg/sfu/forwarder.go +++ b/pkg/sfu/forwarder.go @@ -12,7 +12,10 @@ import ( "github.com/livekit/protocol/logger" "github.com/livekit/livekit-server/pkg/sfu/buffer" + "github.com/livekit/livekit-server/pkg/sfu/codecmunger" dd "github.com/livekit/livekit-server/pkg/sfu/dependencydescriptor" + "github.com/livekit/livekit-server/pkg/sfu/videolayerselector" + "github.com/livekit/livekit-server/pkg/sfu/videolayerselector/temporallayerselector" ) // Forwarder @@ -94,16 +97,15 @@ var ( // ------------------------------------------------------------------- type VideoAllocationProvisional struct { - muted bool - pubMuted bool - maxPublishedLayer int32 - maxTemporalLayerSeen int32 - availableLayers []int32 - Bitrates Bitrates - maxLayers buffer.VideoLayer - currentLayers buffer.VideoLayer - parkedLayers buffer.VideoLayer - allocatedLayers buffer.VideoLayer + muted bool + pubMuted bool + maxSeenLayer buffer.VideoLayer + availableLayers []int32 + Bitrates Bitrates + maxLayers buffer.VideoLayer + currentLayers buffer.VideoLayer + parkedLayers buffer.VideoLayer + allocatedLayers buffer.VideoLayer } // ------------------------------------------------------------------- @@ -122,17 +124,12 @@ func (v VideoTransition) String() string { type TranslationParams struct { shouldDrop bool - isDroppingRelevant bool + isResuming bool isSwitchingToMaxLayer bool rtp *TranslationParamsRTP - vp8 *TranslationParamsVP8 - ddExtension *dd.DependencyDescriptorExtension + codecBytes []byte + ddBytes []byte marker bool - - // indicates this frame has 'Switch' decode indication for target layer - // TODO : in theory, we need check frame chain is not broken for the target - // but we don't have frame queue now, so just use decode target indication - isSwitchingToTargetLayer bool } // ------------------------------------------------------------------- @@ -140,11 +137,16 @@ type TranslationParams struct { type ForwarderState struct { Started bool RTP RTPMungerState - VP8 VP8MungerState + Codec interface{} } func (f ForwarderState) String() string { - return fmt.Sprintf("ForwarderState{started: %v, rtp: %s, vp8: %s}", f.Started, f.RTP.String(), f.VP8.String()) + codecString := "" + switch codecState := f.Codec.(type) { + case codecmunger.VP8State: + codecString = codecState.String() + } + return fmt.Sprintf("ForwarderState{started: %v, rtp: %s, codec: %s}", f.Started, f.RTP.String(), codecString) } // ------------------------------------------------------------------- @@ -159,30 +161,21 @@ type Forwarder struct { muted bool pubMuted bool - maxPublishedLayer int32 - maxTemporalLayerSeen int32 - started bool lastSSRC uint32 referenceLayerSpatial int32 - maxLayers buffer.VideoLayer - currentLayers buffer.VideoLayer - targetLayers buffer.VideoLayer - requestLayerSpatial int32 - parkedLayers buffer.VideoLayer // layers that can resume without key frame - parkedLayersTimer *time.Timer + parkedLayersTimer *time.Timer provisional *VideoAllocationProvisional lastAllocation VideoAllocation rtpMunger *RTPMunger - vp8Munger *VP8Munger - isTemporalSupported bool + vls videolayerselector.VideoLayerSelector - ddLayerSelector *DDVideoLayerSelector + codecMunger codecmunger.CodecMunger onParkedLayersExpired func() } @@ -196,29 +189,16 @@ func NewForwarder( kind: kind, logger: logger, getReferenceLayerRTPTimestamp: getReferenceLayerRTPTimestamp, - - maxPublishedLayer: buffer.InvalidLayerSpatial, - maxTemporalLayerSeen: buffer.InvalidLayerTemporal, - - referenceLayerSpatial: buffer.InvalidLayerSpatial, - - // start off with nothing, let streamallocator/opportunistic forwarder set the target - currentLayers: buffer.InvalidLayers, - targetLayers: buffer.InvalidLayers, - requestLayerSpatial: buffer.InvalidLayerSpatial, - parkedLayers: buffer.InvalidLayers, - - lastAllocation: VideoAllocationDefault, - - rtpMunger: NewRTPMunger(logger), + referenceLayerSpatial: buffer.InvalidLayerSpatial, + lastAllocation: VideoAllocationDefault, + rtpMunger: NewRTPMunger(logger), + vls: videolayerselector.NewNull(logger), + codecMunger: codecmunger.NewNull(logger), } if f.kind == webrtc.RTPCodecTypeVideo { - f.maxLayers = buffer.VideoLayer{Spatial: buffer.InvalidLayerSpatial, Temporal: buffer.DefaultMaxLayerTemporal} - } else { - f.maxLayers = buffer.InvalidLayers + f.vls.SetMaxTemporal(buffer.DefaultMaxLayerTemporal) } - return f } @@ -226,24 +206,26 @@ func (f *Forwarder) SetMaxPublishedLayer(maxPublishedLayer int32) { f.lock.Lock() defer f.lock.Unlock() - if maxPublishedLayer <= f.maxPublishedLayer { + existingMaxSeen := f.vls.GetMaxSeen() + if maxPublishedLayer <= existingMaxSeen.Spatial { return } - f.maxPublishedLayer = maxPublishedLayer - f.logger.Debugw("setting max published layer", "maxPublishedLayer", f.maxPublishedLayer) + f.vls.SetMaxSeenSpatial(maxPublishedLayer) + f.logger.Debugw("setting max published layer", "maxPublishedLayer", maxPublishedLayer) } func (f *Forwarder) SetMaxTemporalLayerSeen(maxTemporalLayerSeen int32) { f.lock.Lock() defer f.lock.Unlock() - if maxTemporalLayerSeen <= f.maxTemporalLayerSeen { + existingMaxSeen := f.vls.GetMaxSeen() + if maxTemporalLayerSeen <= existingMaxSeen.Temporal { return } - f.maxTemporalLayerSeen = maxTemporalLayerSeen - f.logger.Debugw("setting max temporal layer seen", "maxTemporalLayerSeen", f.maxTemporalLayerSeen) + f.vls.SetMaxSeenTemporal(maxTemporalLayerSeen) + f.logger.Debugw("setting max temporal layer seen", "maxTemporalLayerSeen", maxTemporalLayerSeen) } func (f *Forwarder) OnParkedLayersExpired(fn func()) { @@ -260,7 +242,7 @@ func (f *Forwarder) getOnParkedLayersExpired() func() { return f.onParkedLayersExpired } -func (f *Forwarder) DetermineCodec(codec webrtc.RTPCodecCapability) { +func (f *Forwarder) DetermineCodec(codec webrtc.RTPCodecCapability, extensions []webrtc.RTPHeaderExtensionParameter) { f.lock.Lock() defer f.lock.Unlock() @@ -271,12 +253,49 @@ func (f *Forwarder) DetermineCodec(codec webrtc.RTPCodecCapability) { switch strings.ToLower(codec.MimeType) { case "video/vp8": - f.isTemporalSupported = true - f.vp8Munger = NewVP8Munger(f.logger) - case "video/av1", "video/vp9": - // TODO : we only enable dd layer selector for av1 and vp9 now, at future we can - // enable it for vp8 too - f.ddLayerSelector = NewDDVideoLayerSelector(f.logger) + f.codecMunger = codecmunger.NewVP8(f.logger) + if f.vls != nil { + f.vls = videolayerselector.NewSimulcastFromNull(f.vls) + } else { + f.vls = videolayerselector.NewSimulcast(f.logger) + } + f.vls.SetTemporalLayerSelector(temporallayerselector.NewVP8(f.logger)) + case "video/h264": + if f.vls != nil { + f.vls = videolayerselector.NewSimulcastFromNull(f.vls) + } else { + f.vls = videolayerselector.NewSimulcast(f.logger) + } + case "video/vp9": + isDDAvailable := false + searchDone: + for _, ext := range extensions { + switch ext.URI { + case dd.ExtensionUrl: + isDDAvailable = true + break searchDone + } + } + if isDDAvailable { + if f.vls != nil { + f.vls = videolayerselector.NewDependencyDescriptorFromNull(f.vls) + } else { + f.vls = videolayerselector.NewDependencyDescriptor(f.logger) + } + } else { + if f.vls != nil { + f.vls = videolayerselector.NewVP9FromNull(f.vls) + } else { + f.vls = videolayerselector.NewVP9(f.logger) + } + } + case "video/av1": + // DD-TODO : we only enable dd layer selector for av1/vp9 now, in the future we can enable it for vp8 too + if f.vls != nil { + f.vls = videolayerselector.NewDependencyDescriptorFromNull(f.vls) + } else { + f.vls = videolayerselector.NewDependencyDescriptor(f.logger) + } } } @@ -291,10 +310,7 @@ func (f *Forwarder) GetState() ForwarderState { state := ForwarderState{ Started: f.started, RTP: f.rtpMunger.GetLast(), - } - - if f.vp8Munger != nil { - state.VP8 = f.vp8Munger.GetLast() + Codec: f.codecMunger.GetState(), } return state @@ -309,9 +325,7 @@ func (f *Forwarder) SeedState(state ForwarderState) { defer f.lock.Unlock() f.rtpMunger.SeedLast(state.RTP) - if f.vp8Munger != nil { - f.vp8Munger.SeedLast(state.VP8) - } + f.codecMunger.SeedState(state.Codec) f.started = true } @@ -321,7 +335,7 @@ func (f *Forwarder) Mute(muted bool) (bool, buffer.VideoLayer) { defer f.lock.Unlock() if f.muted == muted { - return false, f.maxLayers + return false, f.vls.GetMax() } f.logger.Debugw("setting forwarder mute", "muted", muted) @@ -332,7 +346,7 @@ func (f *Forwarder) Mute(muted bool) (bool, buffer.VideoLayer) { f.resyncLocked() } - return true, f.maxLayers + return true, f.vls.GetMax() } func (f *Forwarder) IsMuted() bool { @@ -347,7 +361,7 @@ func (f *Forwarder) PubMute(pubMuted bool) (bool, buffer.VideoLayer) { defer f.lock.Unlock() if f.pubMuted == pubMuted { - return false, f.maxLayers + return false, f.vls.GetMax() } f.logger.Debugw("setting forwarder pub mute", "pubMuted", pubMuted) @@ -362,13 +376,14 @@ func (f *Forwarder) PubMute(pubMuted bool) (bool, buffer.VideoLayer) { } else { // Do not resync on publisher mute as forwarding can continue on unmute using same layers. // On unmute, park current layers as streaming can continue without a key frame when publisher starts the stream. - if !pubMuted && f.targetLayers.IsValid() && f.currentLayers.Spatial == f.targetLayers.Spatial { - f.setupParkedLayers(f.targetLayers) - f.currentLayers = buffer.InvalidLayers + targetLayer := f.vls.GetTarget() + if !pubMuted && targetLayer.IsValid() && f.vls.GetCurrent().Spatial == targetLayer.Spatial { + f.setupParkedLayers(targetLayer) + f.vls.SetCurrent(buffer.InvalidLayers) } } - return true, f.maxLayers + return true, f.vls.GetMax() } func (f *Forwarder) IsPubMuted() bool { @@ -389,53 +404,63 @@ func (f *Forwarder) SetMaxSpatialLayer(spatialLayer int32) (bool, buffer.VideoLa f.lock.Lock() defer f.lock.Unlock() - if f.kind == webrtc.RTPCodecTypeAudio || spatialLayer == f.maxLayers.Spatial { - return false, f.maxLayers, f.currentLayers + if f.kind == webrtc.RTPCodecTypeAudio { + return false, buffer.InvalidLayers, buffer.InvalidLayers + } + + existingMax := f.vls.GetMax() + if spatialLayer == existingMax.Spatial { + return false, existingMax, f.vls.GetCurrent() } f.logger.Debugw("setting max spatial layer", "layer", spatialLayer) - f.maxLayers.Spatial = spatialLayer + f.vls.SetMaxSpatial(spatialLayer) f.clearParkedLayers() - return true, f.maxLayers, f.currentLayers + return true, f.vls.GetMax(), f.vls.GetCurrent() } func (f *Forwarder) SetMaxTemporalLayer(temporalLayer int32) (bool, buffer.VideoLayer, buffer.VideoLayer) { f.lock.Lock() defer f.lock.Unlock() - if f.kind == webrtc.RTPCodecTypeAudio || temporalLayer == f.maxLayers.Temporal { - return false, f.maxLayers, f.currentLayers + if f.kind == webrtc.RTPCodecTypeAudio { + return false, buffer.InvalidLayers, buffer.InvalidLayers + } + + existingMax := f.vls.GetMax() + if temporalLayer == existingMax.Temporal { + return false, existingMax, f.vls.GetCurrent() } f.logger.Debugw("setting max temporal layer", "layer", temporalLayer) - f.maxLayers.Temporal = temporalLayer + f.vls.SetMaxTemporal(temporalLayer) f.clearParkedLayers() - return true, f.maxLayers, f.currentLayers + return true, f.vls.GetMax(), f.vls.GetCurrent() } func (f *Forwarder) MaxLayers() buffer.VideoLayer { f.lock.RLock() defer f.lock.RUnlock() - return f.maxLayers + return f.vls.GetMax() } func (f *Forwarder) CurrentLayers() buffer.VideoLayer { f.lock.RLock() defer f.lock.RUnlock() - return f.currentLayers + return f.vls.GetCurrent() } func (f *Forwarder) TargetLayers() buffer.VideoLayer { f.lock.RLock() defer f.lock.RUnlock() - return f.targetLayers + return f.vls.GetTarget() } func (f *Forwarder) GetReferenceLayerSpatial() int32 { @@ -470,12 +495,11 @@ func (f *Forwarder) DistanceToDesired(availableLayers []int32, brs Bitrates) flo return getDistanceToDesired( f.muted, f.pubMuted, - f.maxPublishedLayer, - f.maxTemporalLayerSeen, + f.vls.GetMaxSeen(), availableLayers, brs, - f.targetLayers, - f.maxLayers, + f.vls.GetTarget(), + f.vls.GetMax(), ) } @@ -483,7 +507,7 @@ func (f *Forwarder) GetOptimalBandwidthNeeded(brs Bitrates) int64 { f.lock.RLock() defer f.lock.RUnlock() - return getOptimalBandwidthNeeded(f.muted, f.pubMuted, f.maxPublishedLayer, brs, f.maxLayers) + return getOptimalBandwidthNeeded(f.muted, f.pubMuted, f.vls.GetMaxSeen().Spatial, brs, f.vls.GetMax()) } func (f *Forwarder) AllocateOptimal(availableLayers []int32, brs Bitrates, allowOvershoot bool) VideoAllocation { @@ -494,14 +518,19 @@ func (f *Forwarder) AllocateOptimal(availableLayers []int32, brs Bitrates, allow return f.lastAllocation } + maxLayer := f.vls.GetMax() + maxSeenLayer := f.vls.GetMaxSeen() + parkedLayer := f.vls.GetParked() + currentLayer := f.vls.GetCurrent() + requestSpatial := f.vls.GetRequestSpatial() alloc := VideoAllocation{ PauseReason: VideoPauseReasonNone, Bitrates: brs, TargetLayers: buffer.InvalidLayers, - RequestLayerSpatial: f.requestLayerSpatial, - MaxLayers: f.maxLayers, + RequestLayerSpatial: requestSpatial, + MaxLayers: maxLayer, } - optimalBandwidthNeeded := getOptimalBandwidthNeeded(f.muted, f.pubMuted, f.maxPublishedLayer, brs, f.maxLayers) + optimalBandwidthNeeded := getOptimalBandwidthNeeded(f.muted, f.pubMuted, maxSeenLayer.Spatial, brs, maxLayer) if optimalBandwidthNeeded == 0 { alloc.PauseReason = VideoPauseReasonFeedDry } @@ -509,19 +538,19 @@ func (f *Forwarder) AllocateOptimal(availableLayers []int32, brs Bitrates, allow opportunisticAlloc := func() { // opportunistically latch on to anything - maxSpatial := f.maxLayers.Spatial - if allowOvershoot && f.maxPublishedLayer > maxSpatial { - maxSpatial = f.maxPublishedLayer + maxSpatial := maxLayer.Spatial + if allowOvershoot && f.vls.IsOvershootOkay() && maxSeenLayer.Spatial > maxSpatial { + maxSpatial = maxSeenLayer.Spatial } alloc.TargetLayers = buffer.VideoLayer{ - Spatial: int32(math.Min(float64(f.maxPublishedLayer), float64(maxSpatial))), - Temporal: f.maxLayers.Temporal, + Spatial: int32(math.Min(float64(maxSeenLayer.Spatial), float64(maxSpatial))), + Temporal: maxLayer.Temporal, } } switch { - case !f.maxLayers.IsValid() || f.maxPublishedLayer == buffer.InvalidLayerSpatial: - // nothing to do when max layers are not valid OR max publisher layer is invalid + case !maxLayer.IsValid() || maxSeenLayer.Spatial == buffer.InvalidLayerSpatial: + // nothing to do when max layers are not valid OR max published layer is invalid case f.muted: alloc.PauseReason = VideoPauseReasonMuted @@ -529,56 +558,59 @@ func (f *Forwarder) AllocateOptimal(availableLayers []int32, brs Bitrates, allow case f.pubMuted: alloc.PauseReason = VideoPauseReasonPubMuted // leave it at current layers for opportunistic resume - alloc.TargetLayers = f.currentLayers + alloc.TargetLayers = currentLayer alloc.RequestLayerSpatial = alloc.TargetLayers.Spatial - case f.parkedLayers.IsValid(): + case parkedLayer.IsValid(): // if parked on a layer, let it continue - alloc.TargetLayers = f.parkedLayers + alloc.TargetLayers = parkedLayer alloc.RequestLayerSpatial = alloc.TargetLayers.Spatial case len(availableLayers) == 0: // feed may be dry - if f.currentLayers.IsValid() { + if currentLayer.IsValid() { // let it continue at current layer if valid. // Covers the cases of // 1. mis-detection of layer stop - can continue streaming // 2. current layer resuming - can latch on when it starts - alloc.TargetLayers = f.currentLayers + alloc.TargetLayers = currentLayer alloc.RequestLayerSpatial = alloc.TargetLayers.Spatial } else { // opportunistically latch on to anything opportunisticAlloc() - alloc.RequestLayerSpatial = int32(math.Min(float64(f.maxLayers.Spatial), float64(f.maxPublishedLayer))) + alloc.RequestLayerSpatial = int32(math.Min(float64(maxLayer.Spatial), float64(maxSeenLayer.Spatial))) } default: isCurrentLayerAvailable := false - if f.currentLayers.IsValid() { + if currentLayer.IsValid() { for _, l := range availableLayers { - if l == f.currentLayers.Spatial { + if l == currentLayer.Spatial { isCurrentLayerAvailable = true break } } } - if !isCurrentLayerAvailable && f.currentLayers.IsValid() { + if !isCurrentLayerAvailable && currentLayer.IsValid() { // current layer maybe stopped, move to highest available for _, l := range availableLayers { if l > alloc.TargetLayers.Spatial { alloc.TargetLayers.Spatial = l } } - alloc.TargetLayers.Temporal = f.maxLayers.Temporal + alloc.TargetLayers.Temporal = maxLayer.Temporal alloc.RequestLayerSpatial = alloc.TargetLayers.Spatial } else { - requestLayerSpatial := int32(math.Min(float64(f.maxLayers.Spatial), float64(f.maxPublishedLayer))) - if f.currentLayers.IsValid() && requestLayerSpatial == f.requestLayerSpatial && f.currentLayers.Spatial == f.requestLayerSpatial { + requestLayerSpatial := int32(math.Min(float64(maxLayer.Spatial), float64(maxSeenLayer.Spatial))) + if currentLayer.IsValid() && requestLayerSpatial == requestSpatial && currentLayer.Spatial == requestSpatial { // current is locked to desired, stay there - alloc.TargetLayers = buffer.VideoLayer{Spatial: f.requestLayerSpatial, Temporal: f.maxLayers.Temporal} - alloc.RequestLayerSpatial = f.requestLayerSpatial + alloc.TargetLayers = buffer.VideoLayer{ + Spatial: requestSpatial, + Temporal: maxLayer.Temporal, + } + alloc.RequestLayerSpatial = requestSpatial } else { // opportunistically latch on to anything opportunisticAlloc() @@ -598,12 +630,11 @@ func (f *Forwarder) AllocateOptimal(availableLayers []int32, brs Bitrates, allow alloc.DistanceToDesired = getDistanceToDesired( f.muted, f.pubMuted, - f.maxPublishedLayer, - f.maxTemporalLayerSeen, + f.vls.GetMaxSeen(), availableLayers, brs, alloc.TargetLayers, - f.maxLayers, + f.vls.GetMax(), ) return f.updateAllocation(alloc, "optimal") @@ -614,15 +645,14 @@ func (f *Forwarder) ProvisionalAllocatePrepare(availableLayers []int32, Bitrates defer f.lock.Unlock() f.provisional = &VideoAllocationProvisional{ - allocatedLayers: buffer.InvalidLayers, - muted: f.muted, - pubMuted: f.pubMuted, - maxPublishedLayer: f.maxPublishedLayer, - maxTemporalLayerSeen: f.maxTemporalLayerSeen, - Bitrates: Bitrates, - maxLayers: f.maxLayers, - currentLayers: f.currentLayers, - parkedLayers: f.parkedLayers, + allocatedLayers: buffer.InvalidLayers, + muted: f.muted, + pubMuted: f.pubMuted, + maxSeenLayer: f.vls.GetMaxSeen(), + Bitrates: Bitrates, + maxLayers: f.vls.GetMax(), + currentLayers: f.vls.GetCurrent(), + parkedLayers: f.vls.GetParked(), } f.provisional.availableLayers = make([]int32, len(availableLayers)) @@ -633,7 +663,11 @@ func (f *Forwarder) ProvisionalAllocate(availableChannelCapacity int64, layers b f.lock.Lock() defer f.lock.Unlock() - if f.provisional.muted || f.provisional.pubMuted || f.provisional.maxPublishedLayer == buffer.InvalidLayerSpatial || !f.provisional.maxLayers.IsValid() || (!allowOvershoot && layers.GreaterThan(f.provisional.maxLayers)) { + if f.provisional.muted || + f.provisional.pubMuted || + f.provisional.maxSeenLayer.Spatial == buffer.InvalidLayerSpatial || + !f.provisional.maxLayers.IsValid() || + ((!allowOvershoot || !f.vls.IsOvershootOkay()) && layers.GreaterThan(f.provisional.maxLayers)) { return 0 } @@ -654,10 +688,11 @@ func (f *Forwarder) ProvisionalAllocate(availableChannelCapacity int64, layers b } // - // Given layer does not fit. But overshoot is allowed. + // Given layer does not fit. + // // Could be one of // 1. a layer below maximum that does not fit - // 2. a layer above maximum which may or may not fit. + // 2. a layer above maximum which may or may not fit, but overshoot is allowed. // In any of those cases, take the lowest possible layer if pause is not allowed // if !allowPause && (!f.provisional.allocatedLayers.IsValid() || !layers.GreaterThan(f.provisional.allocatedLayers)) { @@ -698,14 +733,15 @@ func (f *Forwarder) ProvisionalAllocateGetCooperativeTransition(allowOvershoot b f.provisional.allocatedLayers = f.provisional.currentLayers } return VideoTransition{ - From: f.targetLayers, + From: f.vls.GetTarget(), To: f.provisional.allocatedLayers, BandwidthDelta: 0 - f.lastAllocation.BandwidthRequested, } } // check if we should preserve current target - if f.targetLayers.IsValid() { + targetLayer := f.vls.GetTarget() + if targetLayer.IsValid() { // what is the highest that is available maximalLayers := buffer.InvalidLayers maximalBandwidthRequired := int64(0) @@ -724,22 +760,22 @@ func (f *Forwarder) ProvisionalAllocateGetCooperativeTransition(allowOvershoot b } if maximalLayers.IsValid() { - if !f.targetLayers.GreaterThan(maximalLayers) && f.provisional.Bitrates[f.targetLayers.Spatial][f.targetLayers.Temporal] != 0 { - // currently streaming and maybe wanting an upgrade (f.targetLayers <= maximalLayers), + if !targetLayer.GreaterThan(maximalLayers) && f.provisional.Bitrates[targetLayer.Spatial][targetLayer.Temporal] != 0 { + // currently streaming and maybe wanting an upgrade (targetLayer <= maximalLayers), // just preserve current target in the cooperative scheme of things - f.provisional.allocatedLayers = f.targetLayers + f.provisional.allocatedLayers = targetLayer return VideoTransition{ - From: f.targetLayers, - To: f.targetLayers, + From: targetLayer, + To: targetLayer, BandwidthDelta: 0, } } - if f.targetLayers.GreaterThan(maximalLayers) { - // maximalLayers < f.targetLayers, make the down move + if targetLayer.GreaterThan(maximalLayers) { + // maximalLayers < targetLayer, make the down move f.provisional.allocatedLayers = maximalLayers return VideoTransition{ - From: f.targetLayers, + From: targetLayer, To: maximalLayers, BandwidthDelta: maximalBandwidthRequired - f.lastAllocation.BandwidthRequested, } @@ -770,20 +806,19 @@ func (f *Forwarder) ProvisionalAllocateGetCooperativeTransition(allowOvershoot b return layers, bw } - targetLayers := f.targetLayers bandwidthRequired := int64(0) - if !targetLayers.IsValid() { + if !targetLayer.IsValid() { // currently not streaming, find minimal // NOTE: a layer in feed could have paused and there could be other options than going back to minimal, // but the cooperative scheme knocks things back to minimal - targetLayers, bandwidthRequired = findNextLayer( + targetLayer, bandwidthRequired = findNextLayer( 0, f.provisional.maxLayers.Spatial, 0, f.provisional.maxLayers.Temporal, ) // could not find a minimal layer, overshoot if allowed - if bandwidthRequired == 0 && f.provisional.maxLayers.IsValid() && allowOvershoot { - targetLayers, bandwidthRequired = findNextLayer( + if bandwidthRequired == 0 && f.provisional.maxLayers.IsValid() && allowOvershoot && f.vls.IsOvershootOkay() { + targetLayer, bandwidthRequired = findNextLayer( f.provisional.maxLayers.Spatial+1, buffer.DefaultMaxLayerSpatial, 0, buffer.DefaultMaxLayerTemporal, ) @@ -791,18 +826,18 @@ func (f *Forwarder) ProvisionalAllocateGetCooperativeTransition(allowOvershoot b } // if nothing available, just leave target at current to enable opportunistic forwarding in case current resumes - if !targetLayers.IsValid() { + if !targetLayer.IsValid() { if f.provisional.parkedLayers.IsValid() { - targetLayers = f.provisional.parkedLayers + targetLayer = f.provisional.parkedLayers } else { - targetLayers = f.provisional.currentLayers + targetLayer = f.provisional.currentLayers } } - f.provisional.allocatedLayers = targetLayers + f.provisional.allocatedLayers = targetLayer return VideoTransition{ - From: f.targetLayers, - To: targetLayers, + From: f.vls.GetTarget(), + To: targetLayer, BandwidthDelta: bandwidthRequired - f.lastAllocation.BandwidthRequested, } } @@ -826,6 +861,7 @@ func (f *Forwarder) ProvisionalAllocateGetBestWeightedTransition() VideoTransiti f.lock.Lock() defer f.lock.Unlock() + targetLayer := f.vls.GetTarget() if f.provisional.muted || f.provisional.pubMuted { f.provisional.allocatedLayers = buffer.InvalidLayers if f.provisional.pubMuted { @@ -833,7 +869,7 @@ func (f *Forwarder) ProvisionalAllocateGetBestWeightedTransition() VideoTransiti f.provisional.allocatedLayers = f.provisional.currentLayers } return VideoTransition{ - From: f.targetLayers, + From: targetLayer, To: f.provisional.allocatedLayers, BandwidthDelta: 0 - f.lastAllocation.BandwidthRequested, } @@ -862,7 +898,7 @@ func (f *Forwarder) ProvisionalAllocateGetBestWeightedTransition() VideoTransiti f.provisional.allocatedLayers = f.provisional.currentLayers } return VideoTransition{ - From: f.targetLayers, + From: targetLayer, To: f.provisional.allocatedLayers, BandwidthDelta: 0 - f.lastAllocation.BandwidthRequested, } @@ -873,20 +909,21 @@ func (f *Forwarder) ProvisionalAllocateGetBestWeightedTransition() VideoTransiti bestLayers := buffer.InvalidLayers bestBandwidthDelta := int64(0) bestValue := float32(0) - for s := int32(0); s <= f.targetLayers.Spatial; s++ { - for t := int32(0); t <= f.targetLayers.Temporal; t++ { - if s == f.targetLayers.Spatial && t == f.targetLayers.Temporal { + for s := int32(0); s <= targetLayer.Spatial; s++ { + for t := int32(0); t <= targetLayer.Temporal; t++ { + if s == targetLayer.Spatial && t == targetLayer.Temporal { break } BandwidthDelta := int64(math.Max(float64(0), float64(f.lastAllocation.BandwidthRequested-f.provisional.Bitrates[s][t]))) transitionCost := int32(0) - if f.targetLayers.Spatial != s { + // LK-TODO: SVC will need a different cost transition + if targetLayer.Spatial != s { transitionCost = TransitionCostSpatial } - qualityCost := (maxReachableLayerTemporal+1)*(f.targetLayers.Spatial-s) + (f.targetLayers.Temporal - t) + qualityCost := (maxReachableLayerTemporal+1)*(targetLayer.Spatial-s) + (targetLayer.Temporal - t) value := float32(0) if (transitionCost + qualityCost) != 0 { @@ -902,7 +939,7 @@ func (f *Forwarder) ProvisionalAllocateGetBestWeightedTransition() VideoTransiti f.provisional.allocatedLayers = bestLayers return VideoTransition{ - From: f.targetLayers, + From: targetLayer, To: bestLayers, BandwidthDelta: bestBandwidthDelta, } @@ -915,7 +952,7 @@ func (f *Forwarder) ProvisionalAllocateCommit() VideoAllocation { optimalBandwidthNeeded := getOptimalBandwidthNeeded( f.provisional.muted, f.provisional.pubMuted, - f.provisional.maxPublishedLayer, + f.provisional.maxSeenLayer.Spatial, f.provisional.Bitrates, f.provisional.maxLayers, ) @@ -930,8 +967,7 @@ func (f *Forwarder) ProvisionalAllocateCommit() VideoAllocation { DistanceToDesired: getDistanceToDesired( f.provisional.muted, f.provisional.pubMuted, - f.provisional.maxPublishedLayer, - f.provisional.maxTemporalLayerSeen, + f.provisional.maxSeenLayer, f.provisional.availableLayers, f.provisional.Bitrates, f.provisional.allocatedLayers, @@ -972,7 +1008,7 @@ func (f *Forwarder) ProvisionalAllocateCommit() VideoAllocation { alloc.BandwidthRequested >= getOptimalBandwidthNeeded( f.provisional.muted, f.provisional.pubMuted, - f.provisional.maxPublishedLayer, + f.provisional.maxSeenLayer.Spatial, f.provisional.Bitrates, f.provisional.maxLayers, ) { @@ -1004,15 +1040,18 @@ func (f *Forwarder) AllocateNextHigher(availableChannelCapacity int64, available } // if targets are still pending, don't increase - if f.targetLayers.IsValid() && f.targetLayers != f.currentLayers { + targetLayer := f.vls.GetTarget() + if targetLayer.IsValid() && targetLayer != f.vls.GetCurrent() { return f.lastAllocation, false } - optimalBandwidthNeeded := getOptimalBandwidthNeeded(f.muted, f.pubMuted, f.maxPublishedLayer, brs, f.maxLayers) + maxLayer := f.vls.GetMax() + maxSeenLayer := f.vls.GetMaxSeen() + optimalBandwidthNeeded := getOptimalBandwidthNeeded(f.muted, f.pubMuted, maxSeenLayer.Spatial, brs, maxLayer) alreadyAllocated := int64(0) - if f.targetLayers.IsValid() { - alreadyAllocated = brs[f.targetLayers.Spatial][f.targetLayers.Temporal] + if targetLayer.IsValid() { + alreadyAllocated = brs[targetLayer.Spatial][targetLayer.Temporal] } doAllocation := func( @@ -1021,38 +1060,37 @@ func (f *Forwarder) AllocateNextHigher(availableChannelCapacity int64, available ) (bool, VideoAllocation, bool) { for s := minSpatial; s <= maxSpatial; s++ { for t := minTemporal; t <= maxTemporal; t++ { - BandwidthRequested := brs[s][t] - if BandwidthRequested == 0 { + bandwidthRequested := brs[s][t] + if bandwidthRequested == 0 { continue } - if !allowOvershoot && BandwidthRequested-alreadyAllocated > availableChannelCapacity { + if (!allowOvershoot || !f.vls.IsOvershootOkay()) && bandwidthRequested-alreadyAllocated > availableChannelCapacity { // next higher available layer does not fit, return return true, f.lastAllocation, false } - targetLayers := buffer.VideoLayer{Spatial: s, Temporal: t} + newTargetLayer := buffer.VideoLayer{Spatial: s, Temporal: t} alloc := VideoAllocation{ IsDeficient: true, - BandwidthRequested: BandwidthRequested, - BandwidthDelta: BandwidthRequested - alreadyAllocated, + BandwidthRequested: bandwidthRequested, + BandwidthDelta: bandwidthRequested - alreadyAllocated, BandwidthNeeded: optimalBandwidthNeeded, Bitrates: brs, - TargetLayers: targetLayers, - RequestLayerSpatial: targetLayers.Spatial, - MaxLayers: f.maxLayers, + TargetLayers: newTargetLayer, + RequestLayerSpatial: newTargetLayer.Spatial, + MaxLayers: maxLayer, DistanceToDesired: getDistanceToDesired( f.muted, f.pubMuted, - f.maxPublishedLayer, - f.maxTemporalLayerSeen, + maxSeenLayer, availableLayers, brs, - targetLayers, - f.maxLayers, + newTargetLayer, + maxLayer, ), } - if targetLayers.GreaterThan(f.maxLayers) || BandwidthRequested >= optimalBandwidthNeeded { + if newTargetLayer.GreaterThan(maxLayer) || bandwidthRequested >= optimalBandwidthNeeded { alloc.IsDeficient = false } @@ -1068,10 +1106,10 @@ func (f *Forwarder) AllocateNextHigher(availableChannelCapacity int64, available boosted := false // try moving temporal layer up in currently streaming spatial layer - if f.targetLayers.IsValid() { + if targetLayer.IsValid() { done, allocation, boosted = doAllocation( - f.targetLayers.Spatial, f.targetLayers.Spatial, - f.targetLayers.Temporal+1, f.maxLayers.Temporal, + targetLayer.Spatial, targetLayer.Spatial, + targetLayer.Temporal+1, maxLayer.Temporal, ) if done { return allocation, boosted @@ -1080,16 +1118,16 @@ func (f *Forwarder) AllocateNextHigher(availableChannelCapacity int64, available // try moving spatial layer up if temporal layer move up is not available done, allocation, boosted = doAllocation( - f.targetLayers.Spatial+1, f.maxLayers.Spatial, - 0, f.maxLayers.Temporal, + targetLayer.Spatial+1, maxLayer.Spatial, + 0, maxLayer.Temporal, ) if done { return allocation, boosted } - if allowOvershoot && f.maxLayers.IsValid() { + if allowOvershoot && f.vls.IsOvershootOkay() && maxLayer.IsValid() { done, allocation, boosted = doAllocation( - f.maxLayers.Spatial+1, buffer.DefaultMaxLayerSpatial, + maxLayer.Spatial+1, buffer.DefaultMaxLayerSpatial, 0, buffer.DefaultMaxLayerTemporal, ) if done { @@ -1114,13 +1152,14 @@ func (f *Forwarder) GetNextHigherTransition(brs Bitrates, allowOvershoot bool) ( } // if targets are still pending, don't increase - if f.targetLayers.IsValid() && f.targetLayers != f.currentLayers { + targetLayer := f.vls.GetTarget() + if targetLayer.IsValid() && targetLayer != f.vls.GetCurrent() { return VideoTransition{}, false } alreadyAllocated := int64(0) - if f.targetLayers.IsValid() { - alreadyAllocated = brs[f.targetLayers.Spatial][f.targetLayers.Temporal] + if targetLayer.IsValid() { + alreadyAllocated = brs[targetLayer.Spatial][targetLayer.Temporal] } findNextHigher := func( @@ -1129,15 +1168,15 @@ func (f *Forwarder) GetNextHigherTransition(brs Bitrates, allowOvershoot bool) ( ) (bool, VideoTransition, bool) { for s := minSpatial; s <= maxSpatial; s++ { for t := minTemporal; t <= maxTemporal; t++ { - BandwidthRequested := brs[s][t] - if BandwidthRequested == 0 { + bandwidthRequested := brs[s][t] + if bandwidthRequested == 0 { continue } transition := VideoTransition{ - From: f.targetLayers, + From: targetLayer, To: buffer.VideoLayer{Spatial: s, Temporal: t}, - BandwidthDelta: BandwidthRequested - alreadyAllocated, + BandwidthDelta: bandwidthRequested - alreadyAllocated, } return true, transition, true @@ -1152,10 +1191,11 @@ func (f *Forwarder) GetNextHigherTransition(brs Bitrates, allowOvershoot bool) ( isAvailable := false // try moving temporal layer up in currently streaming spatial layer - if f.targetLayers.IsValid() { + maxLayer := f.vls.GetMax() + if targetLayer.IsValid() { done, transition, isAvailable = findNextHigher( - f.targetLayers.Spatial, f.targetLayers.Spatial, - f.targetLayers.Temporal+1, f.maxLayers.Temporal, + targetLayer.Spatial, targetLayer.Spatial, + targetLayer.Temporal+1, maxLayer.Temporal, ) if done { return transition, isAvailable @@ -1164,16 +1204,16 @@ func (f *Forwarder) GetNextHigherTransition(brs Bitrates, allowOvershoot bool) ( // try moving spatial layer up if temporal layer move up is not available done, transition, isAvailable = findNextHigher( - f.targetLayers.Spatial+1, f.maxLayers.Spatial, - 0, f.maxLayers.Temporal, + targetLayer.Spatial+1, maxLayer.Spatial, + 0, maxLayer.Temporal, ) if done { return transition, isAvailable } - if allowOvershoot && f.maxLayers.IsValid() { + if allowOvershoot && f.vls.IsOvershootOkay() && maxLayer.IsValid() { done, transition, isAvailable = findNextHigher( - f.maxLayers.Spatial+1, buffer.DefaultMaxLayerSpatial, + maxLayer.Spatial+1, buffer.DefaultMaxLayerSpatial, 0, buffer.DefaultMaxLayerTemporal, ) if done { @@ -1188,7 +1228,9 @@ func (f *Forwarder) Pause(availableLayers []int32, brs Bitrates) VideoAllocation f.lock.Lock() defer f.lock.Unlock() - optimalBandwidthNeeded := getOptimalBandwidthNeeded(f.muted, f.pubMuted, f.maxPublishedLayer, brs, f.maxLayers) + maxLayer := f.vls.GetMax() + maxSeenLayer := f.vls.GetMaxSeen() + optimalBandwidthNeeded := getOptimalBandwidthNeeded(f.muted, f.pubMuted, maxSeenLayer.Spatial, brs, maxLayer) alloc := VideoAllocation{ BandwidthRequested: 0, BandwidthDelta: 0 - f.lastAllocation.BandwidthRequested, @@ -1196,16 +1238,15 @@ func (f *Forwarder) Pause(availableLayers []int32, brs Bitrates) VideoAllocation BandwidthNeeded: optimalBandwidthNeeded, TargetLayers: buffer.InvalidLayers, RequestLayerSpatial: buffer.InvalidLayerSpatial, - MaxLayers: f.maxLayers, + MaxLayers: maxLayer, DistanceToDesired: getDistanceToDesired( f.muted, f.pubMuted, - f.maxPublishedLayer, - f.maxTemporalLayerSeen, + maxSeenLayer, availableLayers, brs, buffer.InvalidLayers, - f.maxLayers, + maxLayer, ), } @@ -1230,6 +1271,11 @@ func (f *Forwarder) Pause(availableLayers []int32, brs Bitrates) VideoAllocation } func (f *Forwarder) updateAllocation(alloc VideoAllocation, reason string) VideoAllocation { + // restrict target temporal to 0 if codec does not support temporal layers + if alloc.TargetLayers.IsValid() && strings.ToLower(f.codec.MimeType) == "video/h264" { + alloc.TargetLayers.Temporal = 0 + } + if alloc.IsDeficient != f.lastAllocation.IsDeficient || alloc.PauseReason != f.lastAllocation.PauseReason || alloc.TargetLayers != f.lastAllocation.TargetLayers || @@ -1243,7 +1289,7 @@ func (f *Forwarder) updateAllocation(alloc VideoAllocation, reason string) Video f.lastAllocation = alloc f.setTargetLayers(f.lastAllocation.TargetLayers, f.lastAllocation.RequestLayerSpatial) - if !f.targetLayers.IsValid() { + if !f.vls.GetTarget().IsValid() { f.resyncLocked() } @@ -1251,12 +1297,8 @@ func (f *Forwarder) updateAllocation(alloc VideoAllocation, reason string) Video } func (f *Forwarder) setTargetLayers(targetLayers buffer.VideoLayer, requestLayerSpatial int32) { - f.targetLayers = targetLayers - if f.ddLayerSelector != nil { - f.ddLayerSelector.SelectLayer(targetLayers) - } - - f.requestLayerSpatial = requestLayerSpatial + f.vls.SetTarget(targetLayers) + f.vls.SetRequestSpatial(requestLayerSpatial) } func (f *Forwarder) Resync() { @@ -1267,13 +1309,13 @@ func (f *Forwarder) Resync() { } func (f *Forwarder) resyncLocked() { - f.currentLayers = buffer.InvalidLayers + f.vls.SetCurrent(buffer.InvalidLayers) f.lastSSRC = 0 f.clearParkedLayers() } func (f *Forwarder) clearParkedLayers() { - f.parkedLayers = buffer.InvalidLayers + f.vls.SetParked(buffer.InvalidLayers) if f.parkedLayersTimer != nil { f.parkedLayersTimer.Stop() f.parkedLayersTimer = nil @@ -1283,13 +1325,14 @@ func (f *Forwarder) clearParkedLayers() { func (f *Forwarder) setupParkedLayers(parkedLayers buffer.VideoLayer) { f.clearParkedLayers() - f.parkedLayers = parkedLayers + f.vls.SetParked(parkedLayers) f.parkedLayersTimer = time.AfterFunc(ParkedLayersWaitDuration, func() { f.lock.Lock() + notify := f.vls.GetParked().IsValid() f.clearParkedLayers() f.lock.Unlock() - if onParkedLayersExpired := f.getOnParkedLayersExpired(); onParkedLayersExpired != nil { + if onParkedLayersExpired := f.getOnParkedLayersExpired(); onParkedLayersExpired != nil && notify { onParkedLayersExpired() } }) @@ -1299,8 +1342,8 @@ func (f *Forwarder) CheckSync() (locked bool, layer int32) { f.lock.RLock() defer f.lock.RUnlock() - layer = f.requestLayerSpatial - locked = f.requestLayerSpatial == f.currentLayers.Spatial || f.parkedLayers.IsValid() + layer = f.vls.GetRequestSpatial() + locked = layer == f.vls.GetCurrent().Spatial || f.vls.GetParked().IsValid() return } @@ -1323,8 +1366,10 @@ func (f *Forwarder) FilterRTX(nacks []uint16) (filtered []uint16, disallowedLaye // // Without the curb, when congestion hits, RTX rate could be so high that it further congests the channel. // + currentLayer := f.vls.GetCurrent() + targetLayer := f.vls.GetTarget() for layer := int32(0); layer < buffer.DefaultMaxLayerSpatial+1; layer++ { - if f.isDeficientLocked() && (f.targetLayers.Spatial < f.currentLayers.Spatial || layer > f.currentLayers.Spatial) { + if f.isDeficientLocked() && (targetLayer.Spatial < currentLayer.Spatial || layer > currentLayer.Spatial) { disallowedLayers[layer] = true } } @@ -1367,9 +1412,7 @@ func (f *Forwarder) getTranslationParamsCommon(extPkt *buffer.ExtPacket, layer i f.started = true f.referenceLayerSpatial = layer f.rtpMunger.SetLastSnTs(extPkt) - if f.vp8Munger != nil { - f.vp8Munger.SetLast(extPkt) - } + f.codecMunger.SetLast(extPkt) } else { if f.referenceLayerSpatial == buffer.InvalidLayerSpatial { // on a resume, reference layer may not be set, so only set when it is invalid @@ -1397,9 +1440,7 @@ func (f *Forwarder) getTranslationParamsCommon(extPkt *buffer.ExtPacket, layer i } f.rtpMunger.UpdateSnTsOffsets(extPkt, 1, td) - if f.vp8Munger != nil { - f.vp8Munger.UpdateOffsets(extPkt) - } + f.codecMunger.UpdateOffsets(extPkt) } f.logger.Debugw("switching feed", "from", f.lastSSRC, "to", extPkt.Packet.SSRC) @@ -1413,13 +1454,8 @@ func (f *Forwarder) getTranslationParamsCommon(extPkt *buffer.ExtPacket, layer i if err != nil { tp.shouldDrop = true if err == ErrPaddingOnlyPacket || err == ErrDuplicatePacket || err == ErrOutOfOrderSequenceNumberCacheMiss { - if err == ErrOutOfOrderSequenceNumberCacheMiss { - tp.isDroppingRelevant = true - } return tp, nil } - - tp.isDroppingRelevant = true return tp, err } @@ -1436,145 +1472,26 @@ func (f *Forwarder) getTranslationParamsAudio(extPkt *buffer.ExtPacket, layer in func (f *Forwarder) getTranslationParamsVideo(extPkt *buffer.ExtPacket, layer int32) (*TranslationParams, error) { tp := &TranslationParams{} - if !f.targetLayers.IsValid() { + if !f.vls.GetTarget().IsValid() { // stream is paused by streamallocator tp.shouldDrop = true return tp, nil } - if f.ddLayerSelector != nil { - if selected := f.ddLayerSelector.Select(extPkt, tp); !selected { - tp.shouldDrop = true + result := f.vls.Select(extPkt, layer) + if !result.IsSelected { + tp.shouldDrop = true + if f.started && result.IsRelevant { f.rtpMunger.UpdateAndGetSnTs(extPkt) // call to update highest incoming sequence number and other internal structures f.rtpMunger.PacketDropped(extPkt) - return tp, nil } - } - - // at this point, either - // 1. dependency description has selected the layer for forwarding OR - // 2. non-dependency deescriptor is yet to make decision, but it can potentially switch to the incoming layer and start forwarding - // - // both cases cases upgrade/downgrade to current layer under the right conditions - if f.currentLayers.Spatial != f.targetLayers.Spatial { - // Three things to check when not locked to target - // 1. Resumable layer - don't need a key frame - // 2. Opportunistic layer upgrade - needs a key frame if not using depedency descriptor - // 3. Need to downgrade - needs a key frame if not using dependency descriptor - found := false - if f.parkedLayers.IsValid() { - if f.parkedLayers.Spatial == layer { - f.logger.Infow( - "resuming at parked layer", - "current", f.currentLayers, - "target", f.targetLayers, - "parked", f.parkedLayers, - "feed", extPkt.Packet.SSRC, - ) - f.currentLayers = f.parkedLayers - found = true - } - } else { - if extPkt.KeyFrame || tp.isSwitchingToTargetLayer { - if layer > f.currentLayers.Spatial && layer <= f.targetLayers.Spatial { - f.logger.Infow( - "upgrading layer", - "current", f.currentLayers, - "target", f.targetLayers, - "max", f.maxLayers, - "layer", layer, - "req", f.requestLayerSpatial, - "maxPublished", f.maxPublishedLayer, - "feed", extPkt.Packet.SSRC, - ) - found = true - } - - if layer < f.currentLayers.Spatial && layer >= f.targetLayers.Spatial { - f.logger.Infow( - "downgrading layer", - "current", f.currentLayers, - "target", f.targetLayers, - "max", f.maxLayers, - "layer", layer, - "req", f.requestLayerSpatial, - "maxPublished", f.maxPublishedLayer, - "feed", extPkt.Packet.SSRC, - ) - found = true - } - - if found { - f.currentLayers.Spatial = layer - if !f.isTemporalSupported { - f.currentLayers.Temporal = f.targetLayers.Temporal - } - } - } - } - - if found { - tp.isSwitchingToTargetLayer = true - f.clearParkedLayers() - if f.currentLayers.Spatial >= f.maxLayers.Spatial { - tp.isSwitchingToMaxLayer = true - - f.logger.Infow( - "reached max layer", - "current", f.currentLayers, - "target", f.targetLayers, - "max", f.maxLayers, - "layer", layer, - "req", f.requestLayerSpatial, - "maxPublished", f.maxPublishedLayer, - "feed", extPkt.Packet.SSRC, - ) - } - - if f.currentLayers.Spatial >= f.maxLayers.Spatial || f.currentLayers.Spatial == f.maxPublishedLayer { - f.targetLayers.Spatial = f.currentLayers.Spatial - if f.ddLayerSelector != nil { - f.ddLayerSelector.SelectLayer(f.targetLayers) - } - } - } - } - - // if locked to higher than max layer due to overshoot, check if it can be dialed back - if f.currentLayers.Spatial > f.maxLayers.Spatial { - if layer <= f.maxLayers.Spatial && (extPkt.KeyFrame || tp.isSwitchingToTargetLayer) { - f.logger.Infow( - "adjusting overshoot", - "current", f.currentLayers, - "target", f.targetLayers, - "max", f.maxLayers, - "layer", layer, - "req", f.requestLayerSpatial, - "maxPublished", f.maxPublishedLayer, - "feed", extPkt.Packet.SSRC, - ) - f.currentLayers.Spatial = layer - - if f.currentLayers.Spatial >= f.maxLayers.Spatial { - tp.isSwitchingToMaxLayer = true - } - - if f.currentLayers.Spatial >= f.maxLayers.Spatial || f.currentLayers.Spatial == f.maxPublishedLayer { - f.targetLayers.Spatial = layer - if f.ddLayerSelector != nil { - f.ddLayerSelector.SelectLayer(f.targetLayers) - } - } - } - } - - // if we have layer selector, let it decide whether to drop or not - if f.ddLayerSelector == nil && f.currentLayers.Spatial != layer { - tp.shouldDrop = true return tp, nil } + tp.isSwitchingToMaxLayer = result.IsSwitchingToMaxSpatial + tp.isResuming = result.IsResuming + tp.marker = result.RTPMarker - if FlagPauseOnDowngrade && f.targetLayers.Spatial < f.currentLayers.Spatial && f.isDeficientLocked() { + if FlagPauseOnDowngrade && f.isDeficientLocked() && f.vls.GetTarget().Spatial < f.vls.GetCurrent().Spatial { // // If target layer is lower than both the current and // maximum subscribed layer, it is due to bandwidth @@ -1594,45 +1511,36 @@ func (f *Forwarder) getTranslationParamsVideo(extPkt *buffer.ExtPacket, layer in // To differentiate between the two cases, drop only when in DEFICIENT state. // tp.shouldDrop = true - tp.isDroppingRelevant = true return tp, nil } _, err := f.getTranslationParamsCommon(extPkt, layer, tp) - if tp.shouldDrop || f.vp8Munger == nil || len(extPkt.Packet.Payload) == 0 { + if tp.shouldDrop || len(extPkt.Packet.Payload) == 0 { return tp, err } - // catch up temporal layer if necessary - if f.currentLayers.Temporal != f.targetLayers.Temporal { - incomingVP8, ok := extPkt.Payload.(buffer.VP8) - if ok { - if incomingVP8.TIDPresent == 0 || incomingVP8.TID <= uint8(f.targetLayers.Temporal) { - f.currentLayers.Temporal = f.targetLayers.Temporal - } - } - } - - tpVP8, err := f.vp8Munger.UpdateAndGet(extPkt, tp.rtp.snOrdering, f.currentLayers.Temporal) + // codec specific forwarding check and any needed packet munging + codecBytes, err := f.codecMunger.UpdateAndGet( + extPkt, + tp.rtp.snOrdering == SequenceNumberOrderingOutOfOrder, + tp.rtp.snOrdering == SequenceNumberOrderingGap, + f.vls.SelectTemporal(extPkt), + ) if err != nil { tp.rtp = nil tp.shouldDrop = true - if err == ErrFilteredVP8TemporalLayer || err == ErrOutOfOrderVP8PictureIdCacheMiss { - if err == ErrFilteredVP8TemporalLayer { + if err == codecmunger.ErrFilteredVP8TemporalLayer || err == codecmunger.ErrOutOfOrderVP8PictureIdCacheMiss { + if err == codecmunger.ErrFilteredVP8TemporalLayer { // filtered temporal layer, update sequence number offset to prevent holes f.rtpMunger.PacketDropped(extPkt) } - if err == ErrOutOfOrderVP8PictureIdCacheMiss { - tp.isDroppingRelevant = true - } return tp, nil } - tp.isDroppingRelevant = true return tp, err } - tp.vp8 = tpVP8 + tp.codecBytes = codecBytes return tp, nil } @@ -1646,7 +1554,7 @@ func (f *Forwarder) GetSnTsForPadding(num int) ([]SnTs, error) { // force a frame marker as a restart of the stream will // start with a key frame which will reset the decoder. forceMarker := false - if !f.targetLayers.IsValid() { + if !f.vls.GetTarget().IsValid() { forceMarker = true } return f.rtpMunger.UpdateAndGetPaddingSnTs(num, 0, 0, forceMarker) @@ -1664,11 +1572,11 @@ func (f *Forwarder) GetSnTsForBlankFrames(frameRate uint32, numPackets int) ([]S return snts, frameEndNeeded, err } -func (f *Forwarder) GetPaddingVP8(frameEndNeeded bool) *buffer.VP8 { +func (f *Forwarder) GetPadding(frameEndNeeded bool) ([]byte, error) { f.lock.Lock() defer f.lock.Unlock() - return f.vp8Munger.UpdateAndGetPadding(!frameEndNeeded) + return f.codecMunger.UpdateAndGetPadding(!frameEndNeeded) } func (f *Forwarder) GetRTPMungerParams() RTPMungerParams { @@ -1706,14 +1614,13 @@ func getOptimalBandwidthNeeded(muted bool, pubMuted bool, maxPublishedLayer int3 func getDistanceToDesired( muted bool, pubMuted bool, - maxPublishedLayer int32, - maxTemporalLayerSeen int32, + maxSeenLayer buffer.VideoLayer, availableLayers []int32, brs Bitrates, targetLayers buffer.VideoLayer, maxLayers buffer.VideoLayer, ) float64 { - if muted || pubMuted || maxPublishedLayer == buffer.InvalidLayerSpatial || maxTemporalLayerSeen == buffer.InvalidLayerTemporal || !maxLayers.IsValid() { + if muted || pubMuted || !maxSeenLayer.IsValid() || !maxLayers.IsValid() { return 0.0 } @@ -1740,7 +1647,7 @@ done: for _, layer := range availableLayers { if layer > maxAvailableSpatial { maxAvailableSpatial = layer - maxAvailableTemporal = maxTemporalLayerSeen // till bit rate measurement is available, assume max seen as temporal + maxAvailableTemporal = maxSeenLayer.Temporal // till bit rate measurement is available, assume max seen as temporal } } @@ -1748,8 +1655,8 @@ done: adjustedMaxLayers.Spatial = maxAvailableSpatial } - if maxPublishedLayer < adjustedMaxLayers.Spatial { - adjustedMaxLayers.Spatial = maxPublishedLayer + if maxSeenLayer.Spatial < adjustedMaxLayers.Spatial { + adjustedMaxLayers.Spatial = maxSeenLayer.Spatial } // max available temporal is min(subscribedMax, temporalLayerSeenMax, availableMax) @@ -1768,8 +1675,8 @@ done: adjustedMaxLayers.Temporal = maxAvailableTemporal } - if maxTemporalLayerSeen < adjustedMaxLayers.Temporal { - adjustedMaxLayers.Temporal = maxTemporalLayerSeen + if maxSeenLayer.Temporal < adjustedMaxLayers.Temporal { + adjustedMaxLayers.Temporal = maxSeenLayer.Temporal } if !adjustedMaxLayers.IsValid() { @@ -1783,11 +1690,11 @@ done: } distance := - ((adjustedMaxLayers.Spatial - adjustedTargetLayers.Spatial) * (maxTemporalLayerSeen + 1)) + + ((adjustedMaxLayers.Spatial - adjustedTargetLayers.Spatial) * (maxSeenLayer.Temporal + 1)) + (adjustedMaxLayers.Temporal - adjustedTargetLayers.Temporal) if !targetLayers.IsValid() { distance++ } - return float64(distance) / float64(maxTemporalLayerSeen+1) + return float64(distance) / float64(maxSeenLayer.Temporal+1) } diff --git a/pkg/sfu/forwarder_test.go b/pkg/sfu/forwarder_test.go index c543824ad..697a0f573 100644 --- a/pkg/sfu/forwarder_test.go +++ b/pkg/sfu/forwarder_test.go @@ -13,13 +13,13 @@ import ( ) func disable(f *Forwarder) { - f.currentLayers = buffer.InvalidLayers - f.targetLayers = buffer.InvalidLayers + f.vls.SetCurrent(buffer.InvalidLayers) + f.vls.SetTarget(buffer.InvalidLayers) } func newForwarder(codec webrtc.RTPCodecCapability, kind webrtc.RTPCodecType) *Forwarder { f := NewForwarder(kind, logger.GetLogger(), nil) - f.DetermineCodec(codec) + f.DetermineCodec(codec, nil) return f } @@ -87,7 +87,7 @@ func TestForwarderLayersVideo(t *testing.T) { require.Equal(t, expectedLayers, f.MaxLayers()) require.Equal(t, buffer.InvalidLayers, currentLayers) - f.currentLayers = buffer.VideoLayer{Spatial: 0, Temporal: 1} + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 0, Temporal: 1}) changed, maxLayers, currentLayers = f.SetMaxSpatialLayer(buffer.DefaultMaxLayerSpatial - 1) require.False(t, changed) require.Equal(t, expectedLayers, maxLayers) @@ -121,7 +121,7 @@ func TestForwarderAllocateOptimal(t *testing.T) { } // invalid max layers - f.maxLayers = buffer.InvalidLayers + f.vls.SetMax(buffer.InvalidLayers) expectedResult := VideoAllocation{ PauseReason: VideoPauseReasonFeedDry, BandwidthRequested: 0, @@ -195,29 +195,29 @@ func TestForwarderAllocateOptimal(t *testing.T) { f.PubMute(false) // when parked layers valid, should stay there - f.parkedLayers = buffer.VideoLayer{ + f.vls.SetParked(buffer.VideoLayer{ Spatial: 0, Temporal: 1, - } + }) expectedResult = VideoAllocation{ PauseReason: VideoPauseReasonFeedDry, BandwidthRequested: 0, BandwidthDelta: 0, Bitrates: emptyBitrates, - TargetLayers: f.parkedLayers, - RequestLayerSpatial: f.parkedLayers.Spatial, + TargetLayers: f.vls.GetParked(), + RequestLayerSpatial: f.vls.GetParked().Spatial, MaxLayers: buffer.DefaultMaxLayers, DistanceToDesired: 0, } result = f.AllocateOptimal(nil, emptyBitrates, true) require.Equal(t, expectedResult, result) require.Equal(t, expectedResult, f.lastAllocation) - require.Equal(t, f.parkedLayers, f.TargetLayers()) - f.parkedLayers = buffer.InvalidLayers + require.Equal(t, f.vls.GetParked(), f.TargetLayers()) + f.vls.SetParked(buffer.InvalidLayers) // when max layers changes, target is opportunistic, but requested spatial layer should be at max f.SetMaxTemporalLayerSeen(3) - f.maxLayers = buffer.VideoLayer{Spatial: 1, Temporal: 3} + f.vls.SetMax(buffer.VideoLayer{Spatial: 1, Temporal: 3}) expectedResult = VideoAllocation{ PauseReason: VideoPauseReasonNone, BandwidthRequested: bitrates[1][3], @@ -225,8 +225,8 @@ func TestForwarderAllocateOptimal(t *testing.T) { BandwidthNeeded: bitrates[1][3], Bitrates: bitrates, TargetLayers: buffer.DefaultMaxLayers, - RequestLayerSpatial: f.maxLayers.Spatial, - MaxLayers: f.maxLayers, + RequestLayerSpatial: f.vls.GetMax().Spatial, + MaxLayers: f.vls.GetMax(), DistanceToDesired: -1, } result = f.AllocateOptimal(nil, bitrates, true) @@ -235,7 +235,7 @@ func TestForwarderAllocateOptimal(t *testing.T) { require.Equal(t, buffer.DefaultMaxLayers, f.TargetLayers()) // reset max layers for rest of the tests below - f.maxLayers = buffer.DefaultMaxLayers + f.vls.SetMax(buffer.DefaultMaxLayers) // when feed is dry and current is not valid, should set up for opportunistic forwarding // NOTE: feed is dry due to availableLayers = nil, some valid bitrates may be passed in here for testing purposes only @@ -260,8 +260,8 @@ func TestForwarderAllocateOptimal(t *testing.T) { require.Equal(t, expectedResult, f.lastAllocation) require.Equal(t, expectedTargetLayers, f.TargetLayers()) - f.targetLayers = buffer.VideoLayer{Spatial: 0, Temporal: 0} // set to valid to trigger paths in tests below - f.currentLayers = buffer.VideoLayer{Spatial: 0, Temporal: 3} // set to valid to trigger paths in tests below + f.vls.SetTarget(buffer.VideoLayer{Spatial: 0, Temporal: 0}) // set to valid to trigger paths in tests below + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 0, Temporal: 3}) // set to valid to trigger paths in tests below // when feed is dry and current is valid, should stay at current expectedTargetLayers = buffer.VideoLayer{ @@ -283,7 +283,7 @@ func TestForwarderAllocateOptimal(t *testing.T) { require.Equal(t, expectedResult, f.lastAllocation) require.Equal(t, expectedTargetLayers, f.TargetLayers()) - f.currentLayers = buffer.InvalidLayers + f.vls.SetCurrent(buffer.InvalidLayers) // opportunistic target if feed is not dry and current is not valid, i. e. not forwarding expectedResult = VideoAllocation{ @@ -303,7 +303,7 @@ func TestForwarderAllocateOptimal(t *testing.T) { require.Equal(t, buffer.DefaultMaxLayers, f.TargetLayers()) // if feed is not dry and current is not locked, should be opportunistic (with and without overshoot) - f.targetLayers = buffer.InvalidLayers + f.vls.SetTarget(buffer.InvalidLayers) expectedResult = VideoAllocation{ PauseReason: VideoPauseReasonFeedDry, BandwidthRequested: 0, @@ -318,7 +318,7 @@ func TestForwarderAllocateOptimal(t *testing.T) { require.Equal(t, expectedResult, result) require.Equal(t, expectedResult, f.lastAllocation) - f.targetLayers = buffer.InvalidLayers + f.vls.SetTarget(buffer.InvalidLayers) expectedTargetLayers = buffer.VideoLayer{ Spatial: 2, Temporal: buffer.DefaultMaxLayerTemporal, @@ -339,7 +339,7 @@ func TestForwarderAllocateOptimal(t *testing.T) { require.Equal(t, expectedResult, f.lastAllocation) // switches to highest available if feed is not dry and current is valid and current is not available - f.currentLayers = buffer.VideoLayer{Spatial: 0, Temporal: 1} + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 0, Temporal: 1}) expectedTargetLayers = buffer.VideoLayer{ Spatial: 1, Temporal: buffer.DefaultMaxLayerTemporal, @@ -360,9 +360,9 @@ func TestForwarderAllocateOptimal(t *testing.T) { require.Equal(t, expectedResult, f.lastAllocation) // stays the same if feed is not dry and current is valid, available and locked - f.maxLayers = buffer.VideoLayer{Spatial: 0, Temporal: 1} - f.currentLayers = buffer.VideoLayer{Spatial: 0, Temporal: 1} - f.requestLayerSpatial = 0 + f.vls.SetMax(buffer.VideoLayer{Spatial: 0, Temporal: 1}) + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 0, Temporal: 1}) + f.vls.SetRequestSpatial(0) expectedTargetLayers = buffer.VideoLayer{ Spatial: 0, Temporal: 1, @@ -374,7 +374,7 @@ func TestForwarderAllocateOptimal(t *testing.T) { Bitrates: emptyBitrates, TargetLayers: expectedTargetLayers, RequestLayerSpatial: 0, - MaxLayers: f.maxLayers, + MaxLayers: f.vls.GetMax(), DistanceToDesired: 0.0, } result = f.AllocateOptimal([]int32{0, 1}, emptyBitrates, true) @@ -382,9 +382,9 @@ func TestForwarderAllocateOptimal(t *testing.T) { require.Equal(t, expectedResult, f.lastAllocation) // opportunistic if feed is not dry and current is valid, but request layer has changed - f.maxLayers = buffer.VideoLayer{Spatial: 2, Temporal: 1} - f.currentLayers = buffer.VideoLayer{Spatial: 0, Temporal: 1} - f.requestLayerSpatial = 0 + f.vls.SetMax(buffer.VideoLayer{Spatial: 2, Temporal: 1}) + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 0, Temporal: 1}) + f.vls.SetRequestSpatial(0) expectedTargetLayers = buffer.VideoLayer{ Spatial: 2, Temporal: 1, @@ -396,7 +396,7 @@ func TestForwarderAllocateOptimal(t *testing.T) { Bitrates: emptyBitrates, TargetLayers: expectedTargetLayers, RequestLayerSpatial: 2, - MaxLayers: f.maxLayers, + MaxLayers: f.vls.GetMax(), DistanceToDesired: -1, } result = f.AllocateOptimal([]int32{0, 1}, emptyBitrates, true) @@ -457,7 +457,7 @@ func TestForwarderProvisionalAllocate(t *testing.T) { require.Equal(t, expectedTargetLayers, f.TargetLayers()) // when nothing fits and pausing disallowed, should allocate (0, 0) - f.targetLayers = buffer.InvalidLayers + f.vls.SetTarget(buffer.InvalidLayers) f.ProvisionalAllocatePrepare(nil, bitrates) usedBitrate = f.ProvisionalAllocate(0, buffer.VideoLayer{Spatial: 0, Temporal: 0}, false, false) require.Equal(t, int64(1), usedBitrate) @@ -539,7 +539,7 @@ func TestForwarderProvisionalAllocate(t *testing.T) { {0, 0, 0, 0}, } - f.currentLayers = buffer.VideoLayer{Spatial: 0, Temporal: 2} + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 0, Temporal: 2}) f.ProvisionalAllocatePrepare(nil, bitrates) // all the provisional allocations should not succeed because the feed is dry @@ -577,7 +577,7 @@ func TestForwarderProvisionalAllocate(t *testing.T) { // // Same case as above, but current is above max, so target should go to invalid // - f.currentLayers = buffer.VideoLayer{Spatial: 1, Temporal: 2} + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 1, Temporal: 2}) f.ProvisionalAllocatePrepare(nil, bitrates) // all the provisional allocations below should not succeed because the feed is dry @@ -690,7 +690,7 @@ func TestForwarderProvisionalAllocateGetCooperativeTransition(t *testing.T) { // a higher target that is already streaming, just maintain it targetLayers := buffer.VideoLayer{Spatial: 2, Temporal: 1} - f.targetLayers = targetLayers + f.vls.SetTarget(targetLayers) f.lastAllocation.BandwidthRequested = 10 expectedTransition = VideoTransition{ From: targetLayers, @@ -719,7 +719,7 @@ func TestForwarderProvisionalAllocateGetCooperativeTransition(t *testing.T) { // from a target that has become unavailable, should switch to lower available layer targetLayers = buffer.VideoLayer{Spatial: 2, Temporal: 2} - f.targetLayers = targetLayers + f.vls.SetTarget(targetLayers) expectedTransition = VideoTransition{ From: targetLayers, To: buffer.VideoLayer{Spatial: 2, Temporal: 1}, @@ -757,7 +757,7 @@ func TestForwarderProvisionalAllocateGetCooperativeTransition(t *testing.T) { {9, 10, 0, 0}, } - f.targetLayers = buffer.InvalidLayers + f.vls.SetTarget(buffer.InvalidLayers) f.ProvisionalAllocatePrepare(nil, bitrates) // from scratch (buffer.InvalidLayers) should go to a layer past maximum as overshoot is allowed @@ -795,8 +795,8 @@ func TestForwarderProvisionalAllocateGetCooperativeTransition(t *testing.T) { {0, 0, 0, 0}, } - f.currentLayers = buffer.VideoLayer{Spatial: 0, Temporal: 2} - f.targetLayers = buffer.InvalidLayers + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 0, Temporal: 2}) + f.vls.SetTarget(buffer.InvalidLayers) f.ProvisionalAllocatePrepare(nil, bitrates) // from scratch (buffer.InvalidLayers) should go to current layer @@ -854,10 +854,10 @@ func TestForwarderProvisionalAllocateGetBestWeightedTransition(t *testing.T) { f.ProvisionalAllocatePrepare(nil, bitrates) - f.targetLayers = buffer.VideoLayer{Spatial: 2, Temporal: 2} + f.vls.SetTarget(buffer.VideoLayer{Spatial: 2, Temporal: 2}) f.lastAllocation.BandwidthRequested = bitrates[2][2] expectedTransition := VideoTransition{ - From: f.targetLayers, + From: f.TargetLayers(), To: buffer.VideoLayer{Spatial: 2, Temporal: 0}, BandwidthDelta: 2, } @@ -894,19 +894,19 @@ func TestForwarderAllocateNextHigher(t *testing.T) { require.False(t, boosted) // if layers have not caught up, should not allocate next layer even if deficient - f.targetLayers = buffer.VideoLayer{ + f.vls.SetTarget(buffer.VideoLayer{ Spatial: 0, Temporal: 0, - } + }) result, boosted = f.AllocateNextHigher(100_000_000, nil, bitrates, false) require.Equal(t, VideoAllocationDefault, result) require.False(t, boosted) f.lastAllocation.IsDeficient = true - f.currentLayers = buffer.VideoLayer{ + f.vls.SetCurrent(buffer.VideoLayer{ Spatial: 0, Temporal: 0, - } + }) // move from (0, 0) -> (0, 1), i.e. a higher temporal layer is available in the same spatial layer expectedTargetLayers := buffer.VideoLayer{ @@ -936,7 +936,7 @@ func TestForwarderAllocateNextHigher(t *testing.T) { require.False(t, boosted) // move from (0, 1) -> (1, 0), i.e. a higher spatial layer is available - f.currentLayers.Temporal = 1 + f.vls.SetCurrent(buffer.VideoLayer{Spatial: f.vls.GetCurrent().Spatial, Temporal: 1}) expectedTargetLayers = buffer.VideoLayer{ Spatial: 1, Temporal: 0, @@ -959,8 +959,7 @@ func TestForwarderAllocateNextHigher(t *testing.T) { require.True(t, boosted) // next higher, move from (1, 0) -> (1, 3), still deficient though - f.currentLayers.Spatial = 1 - f.currentLayers.Temporal = 0 + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 1, Temporal: 0}) expectedTargetLayers = buffer.VideoLayer{ Spatial: 1, Temporal: 3, @@ -983,7 +982,7 @@ func TestForwarderAllocateNextHigher(t *testing.T) { require.True(t, boosted) // next higher, move from (1, 3) -> (2, 1), optimal allocation - f.currentLayers.Temporal = 3 + f.vls.SetCurrent(buffer.VideoLayer{Spatial: f.vls.GetCurrent().Spatial, Temporal: 3}) expectedTargetLayers = buffer.VideoLayer{ Spatial: 2, Temporal: 1, @@ -1005,8 +1004,7 @@ func TestForwarderAllocateNextHigher(t *testing.T) { require.True(t, boosted) // ask again, should return not boosted as there is no room to go higher - f.currentLayers.Spatial = 2 - f.currentLayers.Temporal = 1 + f.vls.SetCurrent(buffer.VideoLayer{Spatial: 2, Temporal: 1}) result, boosted = f.AllocateNextHigher(100_000_000, nil, bitrates, false) require.Equal(t, expectedResult, result) require.Equal(t, expectedResult, f.lastAllocation) @@ -1066,7 +1064,7 @@ func TestForwarderAllocateNextHigher(t *testing.T) { {9, 10, 11, 12}, } - f.currentLayers = f.targetLayers + f.vls.SetCurrent(f.vls.GetTarget()) expectedTargetLayers = buffer.VideoLayer{ Spatial: 1, @@ -1227,8 +1225,7 @@ func TestForwarderGetTranslationParamsAudio(t *testing.T) { extPkt, _ = testutils.GetTestExtPacket(params) expectedTP = TranslationParams{ - shouldDrop: true, - isDroppingRelevant: true, + shouldDrop: true, } actualTP, err = f.GetTranslationParams(extPkt, 0) require.NoError(t, err) @@ -1338,21 +1335,22 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { Timestamp: 0xabcdef, SSRC: 0x12345678, PayloadSize: 20, + SetMarker: true, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: false, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: false, } extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) @@ -1365,10 +1363,10 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { require.Equal(t, expectedTP, *actualTP) // although target layer matches, not a key frame, so should drop - f.targetLayers = buffer.VideoLayer{ + f.vls.SetTarget(buffer.VideoLayer{ Spatial: 0, Temporal: 1, - } + }) expectedTP = TranslationParams{ shouldDrop: true, } @@ -1376,48 +1374,50 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { require.NoError(t, err) require.Equal(t, expectedTP, *actualTP) - // should lock onto packet (target layer and key frame) + // should lock onto packet (key frame) vp8 = &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) + expectedVP8 := &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, + } + marshalledVP8, err := expectedVP8.Marshal() + require.NoError(t, err) expectedTP = TranslationParams{ - isSwitchingToMaxLayer: true, - isSwitchingToTargetLayer: true, + isSwitchingToMaxLayer: true, + isResuming: true, rtp: &TranslationParamsRTP{ snOrdering: SequenceNumberOrderingContiguous, sequenceNumber: 23333, timestamp: 0xabcdef, }, - vp8: &TranslationParamsVP8{ - Header: &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, - }, - }, + codecBytes: marshalledVP8, + marker: true, } actualTP, err = f.GetTranslationParams(extPkt, 0) require.NoError(t, err) @@ -1428,6 +1428,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { // send a duplicate, should be dropped expectedTP = TranslationParams{ shouldDrop: true, + marker: true, } actualTP, err = f.GetTranslationParams(extPkt, 0) require.NoError(t, err) @@ -1442,8 +1443,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) expectedTP = TranslationParams{ - shouldDrop: true, - isDroppingRelevant: true, + shouldDrop: true, } actualTP, err = f.GetTranslationParams(extPkt, 0) require.NoError(t, err) @@ -1471,35 +1471,36 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { PayloadSize: 20, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) + expectedVP8 = &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, + } + marshalledVP8, err = expectedVP8.Marshal() + require.NoError(t, err) expectedTP = TranslationParams{ rtp: &TranslationParamsRTP{ snOrdering: SequenceNumberOrderingContiguous, sequenceNumber: 23334, timestamp: 0xabcdef, }, - vp8: &TranslationParamsVP8{ - Header: &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 1, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, - }, - }, + codecBytes: marshalledVP8, } actualTP, err = f.GetTranslationParams(extPkt, 0) require.NoError(t, err) require.Equal(t, expectedTP, *actualTP) - // temporal layer higher than target, should be dropped + // temporal layer matching target, should be forwarded params = &testutils.TestExtPacketParams{ SequenceNumber: 23336, Timestamp: 0xabcdef, @@ -1507,19 +1508,72 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { PayloadSize: 20, } vp8 = &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13468, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 2, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + S: true, + I: true, + M: true, + PictureID: 13468, + L: true, + TL0PICIDX: 233, + T: true, + TID: 1, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, + } + extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) + expectedVP8 = &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13468, + L: true, + TL0PICIDX: 233, + T: true, + TID: 1, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, + } + marshalledVP8, err = expectedVP8.Marshal() + require.NoError(t, err) + expectedTP = TranslationParams{ + rtp: &TranslationParamsRTP{ + snOrdering: SequenceNumberOrderingContiguous, + sequenceNumber: 23335, + timestamp: 0xabcdef, + }, + codecBytes: marshalledVP8, + } + actualTP, err = f.GetTranslationParams(extPkt, 0) + require.NoError(t, err) + require.Equal(t, expectedTP, *actualTP) + + // temporal layer higher than target, should be dropped + params = &testutils.TestExtPacketParams{ + SequenceNumber: 23337, + Timestamp: 0xabcdef, + SSRC: 0x12345678, + PayloadSize: 20, + } + vp8 = &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13468, + L: true, + TL0PICIDX: 233, + T: true, + TID: 2, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) expectedTP = TranslationParams{ @@ -1531,50 +1585,51 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { // RTP sequence number and VP8 picture id should be contiguous after dropping higher temporal layer picture params = &testutils.TestExtPacketParams{ - SequenceNumber: 23337, + SequenceNumber: 23338, Timestamp: 0xabcdef, SSRC: 0x12345678, PayloadSize: 20, } vp8 = &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13469, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 234, - TIDPresent: 1, - TID: 0, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: false, + FirstByte: 25, + I: true, + M: true, + PictureID: 13469, + L: true, + TL0PICIDX: 234, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: false, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) + expectedVP8 = &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13469, + L: true, + TL0PICIDX: 234, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: false, + } + marshalledVP8, err = expectedVP8.Marshal() + require.NoError(t, err) expectedTP = TranslationParams{ rtp: &TranslationParamsRTP{ snOrdering: SequenceNumberOrderingContiguous, - sequenceNumber: 23335, + sequenceNumber: 23336, timestamp: 0xabcdef, }, - vp8: &TranslationParamsVP8{ - Header: &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13468, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 234, - TIDPresent: 1, - TID: 0, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: false, - }, - }, + codecBytes: marshalledVP8, } actualTP, err = f.GetTranslationParams(extPkt, 0) require.NoError(t, err) @@ -1582,7 +1637,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { // padding only packet after a gap should be forwarded params = &testutils.TestExtPacketParams{ - SequenceNumber: 23339, + SequenceNumber: 23340, Timestamp: 0xabcdef, SSRC: 0x12345678, } @@ -1591,7 +1646,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { expectedTP = TranslationParams{ rtp: &TranslationParamsRTP{ snOrdering: SequenceNumberOrderingGap, - sequenceNumber: 23337, + sequenceNumber: 23338, timestamp: 0xabcdef, }, } @@ -1601,7 +1656,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { // out-of-order should be forwarded using cache, even if it is padding only params = &testutils.TestExtPacketParams{ - SequenceNumber: 23338, + SequenceNumber: 23339, Timestamp: 0xabcdef, SSRC: 0x12345678, } @@ -1610,7 +1665,7 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { expectedTP = TranslationParams{ rtp: &TranslationParamsRTP{ snOrdering: SequenceNumberOrderingOutOfOrder, - sequenceNumber: 23336, + sequenceNumber: 23337, timestamp: 0xabcdef, }, } @@ -1620,10 +1675,10 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { // switching SSRC (happens for new layer or new track source) // should lock onto the new source, but sequence number should be contiguous - f.targetLayers = buffer.VideoLayer{ + f.vls.SetTarget(buffer.VideoLayer{ Spatial: 1, Temporal: 1, - } + }) params = &testutils.TestExtPacketParams{ SequenceNumber: 123, @@ -1632,47 +1687,47 @@ func TestForwarderGetTranslationParamsVideo(t *testing.T) { PayloadSize: 20, } vp8 = &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 45, - MBit: false, - TL0PICIDXPresent: 1, - TL0PICIDX: 12, - TIDPresent: 1, - TID: 0, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 30, - HeaderSize: 5, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: false, + PictureID: 45, + L: true, + TL0PICIDX: 12, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 30, + HeaderSize: 5, + IsKeyFrame: true, } extPkt, _ = testutils.GetTestExtPacketVP8(params, vp8) + expectedVP8 = &buffer.VP8{ + FirstByte: 25, + I: true, + M: true, + PictureID: 13470, + L: true, + TL0PICIDX: 235, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 24, + HeaderSize: 6, + IsKeyFrame: true, + } + marshalledVP8, err = expectedVP8.Marshal() + require.NoError(t, err) expectedTP = TranslationParams{ - isSwitchingToMaxLayer: true, - isSwitchingToTargetLayer: true, + isSwitchingToMaxLayer: true, rtp: &TranslationParamsRTP{ snOrdering: SequenceNumberOrderingContiguous, - sequenceNumber: 23338, + sequenceNumber: 23339, timestamp: 0xabcdf0, }, - vp8: &TranslationParamsVP8{ - Header: &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13469, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 235, - TIDPresent: 1, - TID: 0, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 24, - HeaderSize: 6, - IsKeyFrame: true, - }, - }, + codecBytes: marshalledVP8, } actualTP, err = f.GetTranslationParams(extPkt, 1) require.NoError(t, err) @@ -1690,27 +1745,27 @@ func TestForwardGetSnTsForPadding(t *testing.T) { PayloadSize: 20, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 13, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) - f.targetLayers = buffer.VideoLayer{ + f.vls.SetTarget(buffer.VideoLayer{ Spatial: 0, Temporal: 1, - } - f.currentLayers = buffer.InvalidLayers + }) + f.vls.SetCurrent(buffer.InvalidLayers) // send it through so that forwarder locks onto stream _, _ = f.GetTranslationParams(extPkt, 0) @@ -1757,27 +1812,27 @@ func TestForwardGetSnTsForBlankFrames(t *testing.T) { PayloadSize: 20, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 13, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) - f.targetLayers = buffer.VideoLayer{ + f.vls.SetTarget(buffer.VideoLayer{ Spatial: 0, Temporal: 1, - } - f.currentLayers = buffer.InvalidLayers + }) + f.vls.SetCurrent(buffer.InvalidLayers) // send it through so that forwarder locks onto stream _, _ = f.GetTranslationParams(extPkt, 0) @@ -1827,66 +1882,72 @@ func TestForwardGetPaddingVP8(t *testing.T) { PayloadSize: 20, } vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 13, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 25, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 13, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } extPkt, _ := testutils.GetTestExtPacketVP8(params, vp8) - f.targetLayers = buffer.VideoLayer{ + f.vls.SetTarget(buffer.VideoLayer{ Spatial: 0, Temporal: 1, - } - f.currentLayers = buffer.InvalidLayers + }) + f.vls.SetCurrent(buffer.InvalidLayers) // send it through so that forwarder locks onto stream _, _ = f.GetTranslationParams(extPkt, 0) // getting padding with frame end needed, should repeat the last picture id expectedVP8 := buffer.VP8{ - FirstByte: 16, - PictureIDPresent: 1, - PictureID: 13467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 0, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 16, + I: true, + M: true, + PictureID: 13467, + L: true, + TL0PICIDX: 233, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 23, + HeaderSize: 6, + IsKeyFrame: true, } - blankVP8 := f.GetPaddingVP8(true) - require.Equal(t, expectedVP8, *blankVP8) + blankVP8, err := f.GetPadding(true) + require.NoError(t, err) + marshalledVP8, err := expectedVP8.Marshal() + require.NoError(t, err) + require.Equal(t, marshalledVP8, blankVP8) // getting padding with no frame end needed, should get next picture id expectedVP8 = buffer.VP8{ - FirstByte: 16, - PictureIDPresent: 1, - PictureID: 13468, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 234, - TIDPresent: 1, - TID: 0, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 24, - HeaderSize: 6, - IsKeyFrame: true, + FirstByte: 16, + I: true, + M: true, + PictureID: 13468, + L: true, + TL0PICIDX: 234, + T: true, + TID: 0, + Y: true, + K: true, + KEYIDX: 24, + HeaderSize: 6, + IsKeyFrame: true, } - blankVP8 = f.GetPaddingVP8(false) - require.Equal(t, expectedVP8, *blankVP8) + blankVP8, err = f.GetPadding(false) + require.NoError(t, err) + marshalledVP8, err = expectedVP8.Marshal() + require.NoError(t, err) + require.Equal(t, marshalledVP8, blankVP8) } diff --git a/pkg/sfu/sequencer.go b/pkg/sfu/sequencer.go index 0fb7693af..4f8558512 100644 --- a/pkg/sfu/sequencer.go +++ b/pkg/sfu/sequencer.go @@ -6,8 +6,6 @@ import ( "time" "github.com/livekit/protocol/logger" - - "github.com/livekit/livekit-server/pkg/sfu/buffer" ) const ( @@ -51,45 +49,11 @@ type packetMeta struct { // Spatial layer of packet layer int8 // Information that differs depending on the codec - misc uint64 + codecBytes []byte // Dependency Descriptor of packet ddBytes []byte } -func (p *packetMeta) packVP8(vp8 *buffer.VP8) { - p.misc = uint64(vp8.FirstByte)<<56 | - uint64(vp8.PictureIDPresent&0x1)<<55 | - uint64(vp8.TL0PICIDXPresent&0x1)<<54 | - uint64(vp8.TIDPresent&0x1)<<53 | - uint64(vp8.KEYIDXPresent&0x1)<<52 | - uint64(btoi(vp8.MBit)&0x1)<<51 | - uint64(btoi(vp8.IsKeyFrame)&0x1)<<50 | - uint64(vp8.PictureID&0x7FFF)<<32 | - uint64(vp8.TL0PICIDX&0xFF)<<24 | - uint64(vp8.TID&0x3)<<22 | - uint64(vp8.Y&0x1)<<21 | - uint64(vp8.KEYIDX&0x1F)<<16 | - uint64(vp8.HeaderSize&0xFF)<<8 -} - -func (p *packetMeta) unpackVP8() *buffer.VP8 { - return &buffer.VP8{ - FirstByte: byte(p.misc >> 56), - PictureIDPresent: int((p.misc >> 55) & 0x1), - PictureID: uint16((p.misc >> 32) & 0x7FFF), - MBit: itob(int((p.misc >> 51) & 0x1)), - TL0PICIDXPresent: int((p.misc >> 54) & 0x1), - TL0PICIDX: uint8((p.misc >> 24) & 0xFF), - TIDPresent: int((p.misc >> 53) & 0x1), - TID: uint8((p.misc >> 22) & 0x3), - Y: uint8((p.misc >> 21) & 0x1), - KEYIDXPresent: int((p.misc >> 52) & 0x1), - KEYIDX: uint8((p.misc >> 16) & 0x1F), - HeaderSize: int((p.misc >> 8) & 0xFF), - IsKeyFrame: itob(int((p.misc >> 50) & 0x1)), - } -} - // Sequencer stores the packet sequence received by the down track type sequencer struct { sync.Mutex diff --git a/pkg/sfu/sequencer_test.go b/pkg/sfu/sequencer_test.go index 51cbc0eb4..776434004 100644 --- a/pkg/sfu/sequencer_test.go +++ b/pkg/sfu/sequencer_test.go @@ -8,8 +8,6 @@ import ( "github.com/stretchr/testify/require" "github.com/livekit/protocol/logger" - - "github.com/livekit/livekit-server/pkg/sfu/buffer" ) func Test_sequencer(t *testing.T) { @@ -104,82 +102,3 @@ func Test_sequencer_getNACKSeqNo(t *testing.T) { }) } } - -func Test_packetMeta_VP8(t *testing.T) { - p := &packetMeta{} - - vp8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 55467, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 13, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, - } - - p.packVP8(vp8) - - // booleans are not packed, so they will be `false` in unpacked. - // Also, TID is only two bits, so it should be modulo 3. - expectedVP8 := &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 55467 % 32768, - MBit: true, - TL0PICIDXPresent: 1, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 13 % 3, - Y: 1, - KEYIDXPresent: 1, - KEYIDX: 23, - HeaderSize: 6, - IsKeyFrame: true, - } - unpackedVP8 := p.unpackVP8() - require.Equal(t, expectedVP8, unpackedVP8) - - // short picture id and no TL0PICIDX - vp8 = &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 63, - MBit: false, - TL0PICIDXPresent: 0, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 2, - Y: 1, - KEYIDXPresent: 0, - KEYIDX: 23, - HeaderSize: 23, - IsKeyFrame: true, - } - - p.packVP8(vp8) - - expectedVP8 = &buffer.VP8{ - FirstByte: 25, - PictureIDPresent: 1, - PictureID: 63, - MBit: false, - TL0PICIDXPresent: 0, - TL0PICIDX: 233, - TIDPresent: 1, - TID: 2, - Y: 1, - KEYIDXPresent: 0, - KEYIDX: 23, - HeaderSize: 23, - IsKeyFrame: true, - } - unpackedVP8 = p.unpackVP8() - require.Equal(t, expectedVP8, unpackedVP8) -} diff --git a/pkg/sfu/streamallocator/streamallocator.go b/pkg/sfu/streamallocator/streamallocator.go index e5f7d5e3f..cb0e0915f 100644 --- a/pkg/sfu/streamallocator/streamallocator.go +++ b/pkg/sfu/streamallocator/streamallocator.go @@ -98,7 +98,7 @@ const ( streamAllocatorSignalPeriodicPing streamAllocatorSignalSendProbe streamAllocatorSignalProbeClusterDone - streamAllocatorSignalTargetLayerFound + streamAllocatorSignalResume ) func (s streamAllocatorSignal) String() string { @@ -117,8 +117,8 @@ func (s streamAllocatorSignal) String() string { return "SEND_PROBE" case streamAllocatorSignalProbeClusterDone: return "PROBE_CLUSTER_DONE" - case streamAllocatorSignalTargetLayerFound: - return "TARGET_LAYER_FOUND" + case streamAllocatorSignalResume: + return "RESUME" default: return fmt.Sprintf("%d", int(s)) } @@ -415,10 +415,10 @@ func (s *StreamAllocator) OnSubscribedLayersChanged(downTrack *sfu.DownTrack, la } } -// called when forwarder finds a target layer -func (s *StreamAllocator) OnTargetLayerReached(downTrack *sfu.DownTrack) { +// called when forwarder resumes a track +func (s *StreamAllocator) OnResume(downTrack *sfu.DownTrack) { s.postEvent(Event{ - Signal: streamAllocatorSignalTargetLayerFound, + Signal: streamAllocatorSignalResume, TrackID: livekit.TrackID(downTrack.ID()), }) } @@ -529,8 +529,8 @@ func (s *StreamAllocator) handleEvent(event *Event) { s.handleSignalSendProbe(event) case streamAllocatorSignalProbeClusterDone: s.handleSignalProbeClusterDone(event) - case streamAllocatorSignalTargetLayerFound: - s.handleSignalTargetLayerFound(event) + case streamAllocatorSignalResume: + s.handleSignalResume(event) } } @@ -630,7 +630,7 @@ func (s *StreamAllocator) handleSignalProbeClusterDone(event *Event) { s.probeEndTime = s.lastProbeStartTime.Add(queueWait) } -func (s *StreamAllocator) handleSignalTargetLayerFound(event *Event) { +func (s *StreamAllocator) handleSignalResume(event *Event) { s.videoTracksMu.Lock() track := s.videoTracks[event.TrackID] s.videoTracksMu.Unlock() diff --git a/pkg/sfu/testutils/data.go b/pkg/sfu/testutils/data.go index a3131eeee..8b243b40f 100644 --- a/pkg/sfu/testutils/data.go +++ b/pkg/sfu/testutils/data.go @@ -19,6 +19,7 @@ type TestExtPacketParams struct { PayloadSize int PaddingSize byte ArrivalTime int64 + VideoLayer buffer.VideoLayer } // ----------------------------------------------------------- @@ -44,10 +45,11 @@ func GetTestExtPacket(params *TestExtPacketParams) (*buffer.ExtPacket, error) { } ep := &buffer.ExtPacket{ - Arrival: params.ArrivalTime, - Packet: &packet, - KeyFrame: params.IsKeyFrame, - RawPacket: raw, + VideoLayer: params.VideoLayer, + Arrival: params.ArrivalTime, + Packet: &packet, + KeyFrame: params.IsKeyFrame, + RawPacket: raw, } return ep, nil diff --git a/pkg/sfu/videolayerselector.go b/pkg/sfu/videolayerselector.go deleted file mode 100644 index 1368bc870..000000000 --- a/pkg/sfu/videolayerselector.go +++ /dev/null @@ -1,202 +0,0 @@ -package sfu - -import ( - "fmt" - "sort" - - "github.com/livekit/livekit-server/pkg/sfu/buffer" - dd "github.com/livekit/livekit-server/pkg/sfu/dependencydescriptor" - "github.com/livekit/protocol/logger" -) - -type targetLayer struct { - Target int - Layer buffer.VideoLayer -} - -type DDVideoLayerSelector struct { - logger logger.Logger - - // DD-TODO : fields for frame chain detect - // frameNumberWrapper Uint16Wrapper - // expectKeyFrame bool - - decodeTargetLayer []targetLayer - layer buffer.VideoLayer - activeDecodeTargetsBitmask *uint32 - structure *dd.FrameDependencyStructure -} - -func NewDDVideoLayerSelector(logger logger.Logger) *DDVideoLayerSelector { - return &DDVideoLayerSelector{ - logger: logger, - layer: buffer.VideoLayer{Spatial: 2, Temporal: 2}, - } -} - -func (s *DDVideoLayerSelector) Select(expPkt *buffer.ExtPacket, tp *TranslationParams) (selected bool) { - tp.marker = expPkt.Packet.Marker - if expPkt.DependencyDescriptor == nil { - // packet don't have dependency descriptor, pass check - return true - } - - if expPkt.DependencyDescriptor.AttachedStructure != nil { - // update decode target layer and active decode targets - // DD-TODO : these targets info can be shared by all the downtracks, no need calculate in every selector - s.updateDependencyStructure(expPkt.DependencyDescriptor.AttachedStructure) - } - - // forward all packets before locking - if s.layer == buffer.InvalidLayers { - return true - } - - // DD-TODO : we don't have a rtp queue to ensure the order of packets now, - // so we don't know packet is lost/out of order, that cause us can't detect - // frame integrity, entire frame is forwareded, whether frame chain is broken. - // So use a simple check here, assume all the reference frame is forwarded and - // only check DTI of the active decode target. - // it is not effeciency, at last we need check frame chain integrity. - - activeDecodeTargets := expPkt.DependencyDescriptor.ActiveDecodeTargetsBitmask - if activeDecodeTargets != nil { - s.logger.Debugw("active decode targets", "activeDecodeTargets", *activeDecodeTargets) - } - - currentTarget := -1 - for _, dt := range s.decodeTargetLayer { - // find target match with selected layer - if dt.Layer.Spatial <= s.layer.Spatial && dt.Layer.Temporal <= s.layer.Temporal { - if activeDecodeTargets == nil || ((*activeDecodeTargets)&(1< maxSpatial { - maxSpatial = dt.Layer.Spatial - } - if dt.Layer.Temporal > maxTemporal { - maxTemporal = dt.Layer.Temporal - } - if dt.Layer.Spatial <= layer.Spatial && dt.Layer.Temporal <= layer.Temporal { - activeBitMask |= 1 << dt.Target - } - } - if layer.Spatial == maxSpatial && layer.Temporal == maxTemporal { - // all the decode targets are selected - s.activeDecodeTargetsBitmask = nil - } else { - s.activeDecodeTargetsBitmask = &activeBitMask - } - s.logger.Debugw("select layer ", "layer", layer, "activeDecodeTargetsBitmask", s.activeDecodeTargetsBitmask) -} - -func (s *DDVideoLayerSelector) updateDependencyStructure(structure *dd.FrameDependencyStructure) { - s.structure = structure - s.decodeTargetLayer = s.decodeTargetLayer[:0] - - for target := 0; target < structure.NumDecodeTargets; target++ { - layer := buffer.VideoLayer{Spatial: 0, Temporal: 0} - for _, t := range structure.Templates { - if t.DecodeTargetIndications[target] != dd.DecodeTargetNotPresent { - if layer.Spatial < int32(t.SpatialId) { - layer.Spatial = int32(t.SpatialId) - } - if layer.Temporal < int32(t.TemporalId) { - layer.Temporal = int32(t.TemporalId) - } - } - } - s.decodeTargetLayer = append(s.decodeTargetLayer, targetLayer{target, layer}) - } - - // sort decode target layer by spatial and temporal from high to low - sort.Slice(s.decodeTargetLayer, func(i, j int) bool { - if s.decodeTargetLayer[i].Layer.Spatial == s.decodeTargetLayer[j].Layer.Spatial { - return s.decodeTargetLayer[i].Layer.Temporal > s.decodeTargetLayer[j].Layer.Temporal - } - return s.decodeTargetLayer[i].Layer.Spatial > s.decodeTargetLayer[j].Layer.Spatial - }) - s.logger.Debugw(fmt.Sprintf("update decode targets: %v", s.decodeTargetLayer)) -} - -// DD-TODO : use generic wrapper when updated to go 1.18 -type Uint16Wrapper struct { - lastValue *uint16 - lastUnwrapped int32 -} - -func (w *Uint16Wrapper) Unwrap(value uint16) int32 { - if w.lastValue == nil { - w.lastValue = &value - w.lastUnwrapped = int32(value) - return int32(*w.lastValue) - } - - diff := value - *w.lastValue - w.lastUnwrapped += int32(diff) - if diff == 0x8000 && value < *w.lastValue { - w.lastUnwrapped -= 0x10000 - } else if diff > 0x8000 { - w.lastUnwrapped -= 0x10000 - } - - *w.lastValue = value - return w.lastUnwrapped -} diff --git a/pkg/sfu/videolayerselector/base.go b/pkg/sfu/videolayerselector/base.go new file mode 100644 index 000000000..29a8cdc42 --- /dev/null +++ b/pkg/sfu/videolayerselector/base.go @@ -0,0 +1,120 @@ +package videolayerselector + +import ( + "github.com/livekit/livekit-server/pkg/sfu/buffer" + "github.com/livekit/livekit-server/pkg/sfu/videolayerselector/temporallayerselector" + "github.com/livekit/protocol/logger" +) + +type Base struct { + logger logger.Logger + + tls temporallayerselector.TemporalLayerSelector + + maxLayer buffer.VideoLayer + targetLayer buffer.VideoLayer + requestSpatial int32 + maxSeenLayer buffer.VideoLayer + + parkedLayer buffer.VideoLayer + + currentLayer buffer.VideoLayer +} + +func NewBase(logger logger.Logger) *Base { + return &Base{ + logger: logger, + maxLayer: buffer.InvalidLayers, + targetLayer: buffer.InvalidLayers, // start off with nothing, let streamallocator/opportunistic forwarder set the target + requestSpatial: buffer.InvalidLayerSpatial, + maxSeenLayer: buffer.InvalidLayers, + parkedLayer: buffer.InvalidLayers, + currentLayer: buffer.InvalidLayers, + } +} + +func (b *Base) IsOvershootOkay() bool { + return false +} + +func (b *Base) SetTemporalLayerSelector(tls temporallayerselector.TemporalLayerSelector) { + b.tls = tls +} + +func (b *Base) SetMax(maxLayer buffer.VideoLayer) { + b.maxLayer = maxLayer +} + +func (b *Base) SetMaxSpatial(layer int32) { + b.maxLayer.Spatial = layer +} + +func (b *Base) SetMaxTemporal(layer int32) { + b.maxLayer.Temporal = layer +} + +func (b *Base) GetMax() buffer.VideoLayer { + return b.maxLayer +} + +func (b *Base) SetTarget(targetLayer buffer.VideoLayer) { + b.targetLayer = targetLayer +} + +func (b *Base) GetTarget() buffer.VideoLayer { + return b.targetLayer +} + +func (b *Base) SetRequestSpatial(layer int32) { + b.requestSpatial = layer +} + +func (b *Base) GetRequestSpatial() int32 { + return b.requestSpatial +} + +func (b *Base) SetMaxSeen(maxSeenLayer buffer.VideoLayer) { + b.maxSeenLayer = maxSeenLayer +} + +func (b *Base) SetMaxSeenSpatial(layer int32) { + b.maxSeenLayer.Spatial = layer +} + +func (b *Base) SetMaxSeenTemporal(layer int32) { + b.maxSeenLayer.Temporal = layer +} + +func (b *Base) GetMaxSeen() buffer.VideoLayer { + return b.maxSeenLayer +} + +func (b *Base) SetParked(parkedLayer buffer.VideoLayer) { + b.parkedLayer = parkedLayer +} + +func (b *Base) GetParked() buffer.VideoLayer { + return b.parkedLayer +} + +func (b *Base) SetCurrent(currentLayer buffer.VideoLayer) { + b.currentLayer = currentLayer +} + +func (b *Base) GetCurrent() buffer.VideoLayer { + return b.currentLayer +} + +func (b *Base) Select(_extPkt *buffer.ExtPacket, _layer int32) (result VideoLayerSelectorResult) { + return +} + +func (b *Base) SelectTemporal(extPkt *buffer.ExtPacket) int32 { + if b.tls != nil { + this, next := b.tls.Select(extPkt, b.currentLayer.Temporal, b.targetLayer.Temporal) + b.currentLayer.Temporal = next + return this + } + + return b.currentLayer.Temporal +} diff --git a/pkg/sfu/videolayerselector/dependencydescriptor.go b/pkg/sfu/videolayerselector/dependencydescriptor.go new file mode 100644 index 000000000..8b39049ce --- /dev/null +++ b/pkg/sfu/videolayerselector/dependencydescriptor.go @@ -0,0 +1,278 @@ +package videolayerselector + +import ( + "fmt" + "sort" + + "github.com/livekit/livekit-server/pkg/sfu/buffer" + dd "github.com/livekit/livekit-server/pkg/sfu/dependencydescriptor" + "github.com/livekit/protocol/logger" +) + +type decodeTarget struct { + Target int + Layer buffer.VideoLayer +} + +type DependencyDescriptor struct { + *Base + + // DD-TODO : fields for frame chain detect + // frameNumberWrapper Uint16Wrapper + // expectKeyFrame bool + + decodeTargets []decodeTarget + activeDecodeTargetsBitmask *uint32 + structure *dd.FrameDependencyStructure +} + +func NewDependencyDescriptor(logger logger.Logger) *DependencyDescriptor { + return &DependencyDescriptor{ + Base: NewBase(logger), + } +} + +func NewDependencyDescriptorFromNull(vls VideoLayerSelector) *DependencyDescriptor { + return &DependencyDescriptor{ + Base: vls.(*Null).Base, + } +} + +func (d *DependencyDescriptor) IsOvershootOkay() bool { + return false +} + +func (d *DependencyDescriptor) Select(extPkt *buffer.ExtPacket, _layer int32) (result VideoLayerSelectorResult) { + if extPkt.DependencyDescriptor == nil { + // packet don't have dependency descriptor + return + } + + if !d.currentLayer.IsValid() && !extPkt.KeyFrame { + return + } + + result.IsRelevant = true + + if extPkt.DependencyDescriptor.AttachedStructure != nil { + // update decode target layer and active decode targets + // DD-TODO : these targets info can be shared by all the downtracks, no need calculate in every selector + d.updateDependencyStructure(extPkt.DependencyDescriptor.AttachedStructure) + } + + // DD-TODO : we don't have a rtp queue to ensure the order of packets now, + // so we don't know packet is lost/out of order, that cause us can't detect + // frame integrity, entire frame is forwareded, whether frame chain is broken. + // So use a simple check here, assume all the reference frame is forwarded and + // only check DTI of the active decode target. + // it is not effeciency, at last we need check frame chain integrity. + + activeDecodeTargets := extPkt.DependencyDescriptor.ActiveDecodeTargetsBitmask + if activeDecodeTargets != nil { + d.logger.Debugw("active decode targets", "activeDecodeTargets", *activeDecodeTargets) + } + + currentTarget := -1 + for _, dt := range d.decodeTargets { + // find target match with selected layer + if dt.Layer.Spatial <= d.targetLayer.Spatial && dt.Layer.Temporal <= d.targetLayer.Temporal { + if activeDecodeTargets == nil || ((*activeDecodeTargets)&(1< maxSpatial { + maxSpatial = dt.Layer.Spatial + } + if dt.Layer.Temporal > maxTemporal { + maxTemporal = dt.Layer.Temporal + } + if dt.Layer.Spatial <= targetLayer.Spatial && dt.Layer.Temporal <= targetLayer.Temporal { + activeBitMask |= 1 << dt.Target + } + } + if targetLayer.Spatial == maxSpatial && targetLayer.Temporal == maxTemporal { + // all the decode targets are selected + d.activeDecodeTargetsBitmask = nil + } else { + d.activeDecodeTargetsBitmask = &activeBitMask + } + d.logger.Debugw("setting target", "targetlayer", targetLayer, "activeDecodeTargetsBitmask", d.activeDecodeTargetsBitmask) +} + +func (d *DependencyDescriptor) updateDependencyStructure(structure *dd.FrameDependencyStructure) { + d.structure = structure + d.decodeTargets = d.decodeTargets[:0] + + for target := 0; target < structure.NumDecodeTargets; target++ { + layer := buffer.VideoLayer{Spatial: 0, Temporal: 0} + for _, t := range structure.Templates { + if t.DecodeTargetIndications[target] != dd.DecodeTargetNotPresent { + if layer.Spatial < int32(t.SpatialId) { + layer.Spatial = int32(t.SpatialId) + } + if layer.Temporal < int32(t.TemporalId) { + layer.Temporal = int32(t.TemporalId) + } + } + } + d.decodeTargets = append(d.decodeTargets, decodeTarget{target, layer}) + } + + // sort decode target layer by spatial and temporal from high to low + sort.Slice(d.decodeTargets, func(i, j int) bool { + return d.decodeTargets[i].Layer.GreaterThan(d.decodeTargets[j].Layer) + }) + d.logger.Debugw(fmt.Sprintf("update decode targets: %v", d.decodeTargets)) +} + +// DD-TODO : use generic wrapper when updated to go 1.18 +type Uint16Wrapper struct { + lastValue *uint16 + lastUnwrapped int32 +} + +func (w *Uint16Wrapper) Unwrap(value uint16) int32 { + if w.lastValue == nil { + w.lastValue = &value + w.lastUnwrapped = int32(value) + return int32(*w.lastValue) + } + + diff := value - *w.lastValue + w.lastUnwrapped += int32(diff) + if diff == 0x8000 && value < *w.lastValue { + w.lastUnwrapped -= 0x10000 + } else if diff > 0x8000 { + w.lastUnwrapped -= 0x10000 + } + + *w.lastValue = value + return w.lastUnwrapped +} diff --git a/pkg/sfu/videolayerselector/null.go b/pkg/sfu/videolayerselector/null.go new file mode 100644 index 000000000..d1b87fb1b --- /dev/null +++ b/pkg/sfu/videolayerselector/null.go @@ -0,0 +1,15 @@ +package videolayerselector + +import ( + "github.com/livekit/protocol/logger" +) + +type Null struct { + *Base +} + +func NewNull(logger logger.Logger) *Null { + return &Null{ + Base: NewBase(logger), + } +} diff --git a/pkg/sfu/videolayerselector/simulcast.go b/pkg/sfu/videolayerselector/simulcast.go new file mode 100644 index 000000000..9bb1d368f --- /dev/null +++ b/pkg/sfu/videolayerselector/simulcast.go @@ -0,0 +1,143 @@ +package videolayerselector + +import ( + "github.com/livekit/livekit-server/pkg/sfu/buffer" + "github.com/livekit/protocol/logger" +) + +type Simulcast struct { + *Base +} + +func NewSimulcast(logger logger.Logger) *Simulcast { + return &Simulcast{ + Base: NewBase(logger), + } +} + +func NewSimulcastFromNull(vls VideoLayerSelector) *Simulcast { + return &Simulcast{ + Base: vls.(*Null).Base, + } +} + +func (s *Simulcast) IsOvershootOkay() bool { + return true +} + +func (s *Simulcast) Select(extPkt *buffer.ExtPacket, layer int32) (result VideoLayerSelectorResult) { + if s.currentLayer.Spatial != s.targetLayer.Spatial { + // Three things to check when not locked to target + // 1. Resumable layer - don't need a key frame + // 2. Opportunistic layer upgrade - needs a key frame + // 3. Need to downgrade - needs a key frame + isActive := s.currentLayer.IsValid() + found := false + if s.parkedLayer.IsValid() { + if s.parkedLayer.Spatial == layer { + s.logger.Infow( + "resuming at parked layer", + "current", s.currentLayer, + "target", s.targetLayer, + "max", s.maxLayer, + "parked", s.parkedLayer, + "req", s.requestSpatial, + "maxSeen", s.maxSeenLayer, + "feed", extPkt.Packet.SSRC, + ) + s.currentLayer = s.parkedLayer + found = true + } + } else { + if extPkt.KeyFrame { + if layer > s.currentLayer.Spatial && layer <= s.targetLayer.Spatial { + s.logger.Infow( + "upgrading layer", + "current", s.currentLayer, + "target", s.targetLayer, + "max", s.maxLayer, + "layer", layer, + "req", s.requestSpatial, + "maxSeen", s.maxSeenLayer, + "feed", extPkt.Packet.SSRC, + ) + found = true + } + + if layer < s.currentLayer.Spatial && layer >= s.targetLayer.Spatial { + s.logger.Infow( + "downgrading layer", + "current", s.currentLayer, + "target", s.targetLayer, + "max", s.maxLayer, + "layer", layer, + "req", s.requestSpatial, + "maxSeen", s.maxSeenLayer, + "feed", extPkt.Packet.SSRC, + ) + found = true + } + + if found { + s.currentLayer.Spatial = layer + s.currentLayer.Temporal = extPkt.VideoLayer.Temporal + } + } + } + + if found { + if !isActive { + result.IsResuming = true + } + s.SetParked(buffer.InvalidLayers) + if s.currentLayer.Spatial >= s.maxLayer.Spatial { + result.IsSwitchingToMaxSpatial = true + + s.logger.Infow( + "reached max layer", + "current", s.currentLayer, + "target", s.targetLayer, + "max", s.maxLayer, + "layer", layer, + "req", s.requestSpatial, + "maxSeen", s.maxSeenLayer, + "feed", extPkt.Packet.SSRC, + ) + } + + if s.currentLayer.Spatial >= s.maxLayer.Spatial || s.currentLayer.Spatial == s.maxSeenLayer.Spatial { + s.targetLayer.Spatial = s.currentLayer.Spatial + } + } + } + + // if locked to higher than max layer due to overshoot, check if it can be dialed back + if s.currentLayer.Spatial > s.maxLayer.Spatial { + if layer <= s.maxLayer.Spatial && extPkt.KeyFrame { + s.logger.Infow( + "adjusting overshoot", + "current", s.currentLayer, + "target", s.targetLayer, + "max", s.maxLayer, + "layer", layer, + "req", s.requestSpatial, + "maxSeen", s.maxSeenLayer, + "feed", extPkt.Packet.SSRC, + ) + s.currentLayer.Spatial = layer + + if s.currentLayer.Spatial >= s.maxLayer.Spatial { + result.IsSwitchingToMaxSpatial = true + } + + if s.currentLayer.Spatial >= s.maxLayer.Spatial || s.currentLayer.Spatial == s.maxSeenLayer.Spatial { + s.targetLayer.Spatial = layer + } + } + } + + result.RTPMarker = extPkt.Packet.Marker + result.IsSelected = layer == s.currentLayer.Spatial + result.IsRelevant = false + return +} diff --git a/pkg/sfu/videolayerselector/temporallayerselector/null.go b/pkg/sfu/videolayerselector/temporallayerselector/null.go new file mode 100644 index 000000000..d39cab106 --- /dev/null +++ b/pkg/sfu/videolayerselector/temporallayerselector/null.go @@ -0,0 +1,17 @@ +package temporallayerselector + +import ( + "github.com/livekit/livekit-server/pkg/sfu/buffer" +) + +type Null struct{} + +func NewNull() *Null { + return &Null{} +} + +func Select(_extPkt *buffer.ExtPacket, current int32, _target int32) (this int32, next int32) { + this = current + next = current + return +} diff --git a/pkg/sfu/videolayerselector/temporallayerselector/temporallayerselector.go b/pkg/sfu/videolayerselector/temporallayerselector/temporallayerselector.go new file mode 100644 index 000000000..8219aea2d --- /dev/null +++ b/pkg/sfu/videolayerselector/temporallayerselector/temporallayerselector.go @@ -0,0 +1,7 @@ +package temporallayerselector + +import "github.com/livekit/livekit-server/pkg/sfu/buffer" + +type TemporalLayerSelector interface { + Select(extPkt *buffer.ExtPacket, current int32, target int32) (this int32, next int32) +} diff --git a/pkg/sfu/videolayerselector/temporallayerselector/vp8.go b/pkg/sfu/videolayerselector/temporallayerselector/vp8.go new file mode 100644 index 000000000..fbf697f33 --- /dev/null +++ b/pkg/sfu/videolayerselector/temporallayerselector/vp8.go @@ -0,0 +1,42 @@ +package temporallayerselector + +import ( + "github.com/livekit/livekit-server/pkg/sfu/buffer" + "github.com/livekit/protocol/logger" +) + +type VP8 struct { + logger logger.Logger +} + +func NewVP8(logger logger.Logger) *VP8 { + return &VP8{ + logger: logger, + } +} + +func (v *VP8) Select(extPkt *buffer.ExtPacket, current int32, target int32) (this int32, next int32) { + this = current + next = current + if current == target { + return + } + + vp8, ok := extPkt.Payload.(buffer.VP8) + if !ok || !vp8.T { + return + } + + tid := int32(vp8.TID) + if current < target { + if tid > current && tid <= target && vp8.S && vp8.Y { + this = tid + next = tid + } + } else { + if tid < current && tid >= target && extPkt.Packet.Marker { + next = tid + } + } + return +} diff --git a/pkg/sfu/videolayerselector/videolayerselector.go b/pkg/sfu/videolayerselector/videolayerselector.go new file mode 100644 index 000000000..548f736dc --- /dev/null +++ b/pkg/sfu/videolayerselector/videolayerselector.go @@ -0,0 +1,46 @@ +package videolayerselector + +import ( + "github.com/livekit/livekit-server/pkg/sfu/buffer" + "github.com/livekit/livekit-server/pkg/sfu/videolayerselector/temporallayerselector" +) + +type VideoLayerSelectorResult struct { + IsSelected bool + IsRelevant bool + IsResuming bool + IsSwitchingToMaxSpatial bool + RTPMarker bool + DependencyDescriptorExtension []byte +} + +type VideoLayerSelector interface { + IsOvershootOkay() bool + + SetTemporalLayerSelector(tls temporallayerselector.TemporalLayerSelector) + + SetMax(maxLayer buffer.VideoLayer) + SetMaxSpatial(layer int32) + SetMaxTemporal(layer int32) + GetMax() buffer.VideoLayer + + SetTarget(targetLayer buffer.VideoLayer) + GetTarget() buffer.VideoLayer + + SetRequestSpatial(layer int32) + GetRequestSpatial() int32 + + SetMaxSeen(maxSeenLayer buffer.VideoLayer) + SetMaxSeenSpatial(layer int32) + SetMaxSeenTemporal(layer int32) + GetMaxSeen() buffer.VideoLayer + + SetParked(parkedLayer buffer.VideoLayer) + GetParked() buffer.VideoLayer + + SetCurrent(currentLayer buffer.VideoLayer) + GetCurrent() buffer.VideoLayer + + Select(extPkt *buffer.ExtPacket, layer int32) VideoLayerSelectorResult + SelectTemporal(extPkt *buffer.ExtPacket) int32 +} diff --git a/pkg/sfu/videolayerselector/vp9.go b/pkg/sfu/videolayerselector/vp9.go new file mode 100644 index 000000000..0e7783d98 --- /dev/null +++ b/pkg/sfu/videolayerselector/vp9.go @@ -0,0 +1,102 @@ +package videolayerselector + +import ( + "github.com/livekit/livekit-server/pkg/sfu/buffer" + "github.com/livekit/protocol/logger" + "github.com/pion/rtp/codecs" +) + +type VP9 struct { + *Base +} + +func NewVP9(logger logger.Logger) *VP9 { + return &VP9{ + Base: NewBase(logger), + } +} + +func NewVP9FromNull(vls VideoLayerSelector) *VP9 { + return &VP9{ + Base: vls.(*Null).Base, + } +} + +func (v *VP9) IsOvershootOkay() bool { + return false +} + +func (v *VP9) Select(extPkt *buffer.ExtPacket, _layer int32) (result VideoLayerSelectorResult) { + vp9, ok := extPkt.Payload.(codecs.VP9Packet) + if !ok { + return + } + + currentLayer := v.currentLayer + if v.currentLayer != v.targetLayer { + updatedLayer := v.currentLayer + + if !v.currentLayer.IsValid() { + if !extPkt.KeyFrame { + return + } + + updatedLayer = extPkt.VideoLayer + currentLayer = extPkt.VideoLayer + } else { + // temporal scale up/down + if v.currentLayer.Temporal < v.targetLayer.Temporal { + if extPkt.VideoLayer.Temporal > v.currentLayer.Temporal && extPkt.VideoLayer.Temporal <= v.targetLayer.Temporal && vp9.U && vp9.B { + updatedLayer.Temporal = extPkt.VideoLayer.Temporal + currentLayer.Temporal = extPkt.VideoLayer.Temporal + } + } else { + if extPkt.VideoLayer.Temporal < v.currentLayer.Temporal && extPkt.VideoLayer.Temporal >= v.targetLayer.Temporal && vp9.E { + updatedLayer.Temporal = extPkt.VideoLayer.Temporal + } + } + + // spatial scale up/down + if v.currentLayer.Spatial < v.targetLayer.Spatial { + if extPkt.VideoLayer.Spatial > v.currentLayer.Spatial && extPkt.VideoLayer.Spatial <= v.targetLayer.Spatial && !vp9.P && vp9.B { + updatedLayer.Spatial = extPkt.VideoLayer.Spatial + currentLayer.Spatial = extPkt.VideoLayer.Spatial + } + } else { + if extPkt.VideoLayer.Spatial < v.currentLayer.Spatial && extPkt.VideoLayer.Spatial >= v.targetLayer.Spatial && vp9.E { + updatedLayer.Spatial = extPkt.VideoLayer.Spatial + } + } + } + + if updatedLayer != v.currentLayer { + if !v.currentLayer.IsValid() && updatedLayer.IsValid() { + result.IsResuming = true + } + + if v.currentLayer.Spatial != v.maxLayer.Spatial && updatedLayer.Spatial == v.maxLayer.Spatial { + result.IsSwitchingToMaxSpatial = true + v.logger.Infow( + "reached max layer", + "current", v.currentLayer, + "target", v.targetLayer, + "max", v.maxLayer, + "layer", extPkt.VideoLayer.Spatial, + "req", v.requestSpatial, + "maxSeen", v.maxSeenLayer, + "feed", extPkt.Packet.SSRC, + ) + } + + v.currentLayer = updatedLayer + } + } + + result.RTPMarker = extPkt.Packet.Marker + if extPkt.VideoLayer.Spatial == v.currentLayer.Spatial && vp9.E { + result.RTPMarker = true + } + result.IsSelected = !extPkt.VideoLayer.GreaterThan(currentLayer) + result.IsRelevant = true + return +}