diff --git a/pkg/sfu/forwarder.go b/pkg/sfu/forwarder.go index 33c196b86..8bb21c387 100644 --- a/pkg/sfu/forwarder.go +++ b/pkg/sfu/forwarder.go @@ -1594,6 +1594,12 @@ func (f *Forwarder) getTranslationParamsAudio(extPkt *buffer.ExtPacket, layer in // should be called with lock held func (f *Forwarder) getTranslationParamsVideo(extPkt *buffer.ExtPacket, layer int32) (*TranslationParams, error) { + maybeRollback := func(isSwitching bool) { + if isSwitching { + f.vls.Rollback() + } + } + tp := &TranslationParams{} if !f.vls.GetTarget().IsValid() { @@ -1640,20 +1646,23 @@ func (f *Forwarder) getTranslationParamsVideo(extPkt *buffer.ExtPacket, layer in // To differentiate between the two cases, drop only when in DEFICIENT state. // tp.shouldDrop = true + maybeRollback(result.IsSwitching) return tp, nil } _, err := f.getTranslationParamsCommon(extPkt, layer, tp) if tp.shouldDrop || len(extPkt.Packet.Payload) == 0 { + maybeRollback(result.IsSwitching) return tp, err } // codec specific forwarding check and any needed packet munging + tl, isSwitching := f.vls.SelectTemporal(extPkt) codecBytes, err := f.codecMunger.UpdateAndGet( extPkt, tp.rtp.snOrdering == SequenceNumberOrderingOutOfOrder, tp.rtp.snOrdering == SequenceNumberOrderingGap, - f.vls.SelectTemporal(extPkt), + tl, ) if err != nil { tp.rtp = nil @@ -1663,9 +1672,11 @@ func (f *Forwarder) getTranslationParamsVideo(extPkt *buffer.ExtPacket, layer in // filtered temporal layer, update sequence number offset to prevent holes f.rtpMunger.PacketDropped(extPkt) } + maybeRollback(result.IsSwitching || isSwitching) return tp, nil } + maybeRollback(result.IsSwitching || isSwitching) return tp, err } diff --git a/pkg/sfu/videolayerselector/base.go b/pkg/sfu/videolayerselector/base.go index 534cb751b..7323fc7ae 100644 --- a/pkg/sfu/videolayerselector/base.go +++ b/pkg/sfu/videolayerselector/base.go @@ -11,25 +11,33 @@ type Base struct { tls temporallayerselector.TemporalLayerSelector - maxLayer buffer.VideoLayer - targetLayer buffer.VideoLayer + maxLayer buffer.VideoLayer + maxSeenLayer buffer.VideoLayer + + targetLayer buffer.VideoLayer + previousTargetLayer buffer.VideoLayer + requestSpatial int32 - maxSeenLayer buffer.VideoLayer - parkedLayer buffer.VideoLayer + parkedLayer buffer.VideoLayer + previousParkedLayer buffer.VideoLayer - currentLayer buffer.VideoLayer + currentLayer buffer.VideoLayer + previousLayer buffer.VideoLayer } func NewBase(logger logger.Logger) *Base { return &Base{ - logger: logger, - maxLayer: buffer.InvalidLayer, - targetLayer: buffer.InvalidLayer, // start off with nothing, let streamallocator/opportunistic forwarder set the target - requestSpatial: buffer.InvalidLayerSpatial, - maxSeenLayer: buffer.InvalidLayer, - parkedLayer: buffer.InvalidLayer, - currentLayer: buffer.InvalidLayer, + logger: logger, + maxLayer: buffer.InvalidLayer, + maxSeenLayer: buffer.InvalidLayer, + targetLayer: buffer.InvalidLayer, // start off with nothing, let streamallocator/opportunistic forwarder set the target + previousTargetLayer: buffer.InvalidLayer, + requestSpatial: buffer.InvalidLayerSpatial, + parkedLayer: buffer.InvalidLayer, + previousParkedLayer: buffer.InvalidLayer, + currentLayer: buffer.InvalidLayer, + previousLayer: buffer.InvalidLayer, } } @@ -58,6 +66,7 @@ func (b *Base) GetMax() buffer.VideoLayer { } func (b *Base) SetTarget(targetLayer buffer.VideoLayer) { + b.previousTargetLayer = targetLayer b.targetLayer = targetLayer } @@ -115,12 +124,49 @@ func (b *Base) Select(_extPkt *buffer.ExtPacket, _layer int32) (result VideoLaye return } -func (b *Base) SelectTemporal(extPkt *buffer.ExtPacket) int32 { +func (b *Base) Rollback() { + b.logger.Infow( + "rolling back", + "previous", b.previousLayer, + "current", b.currentLayer, + "previousParked", b.previousParkedLayer, + "parked", b.parkedLayer, + "previousTarget", b.previousTargetLayer, + "target", b.targetLayer, + "max", b.maxLayer, + "req", b.requestSpatial, + "maxSeen", b.maxSeenLayer, + ) + b.parkedLayer = b.previousParkedLayer + b.currentLayer = b.previousLayer + b.targetLayer = b.previousTargetLayer +} + +func (b *Base) SelectTemporal(extPkt *buffer.ExtPacket) (int32, bool) { if b.tls != nil { + isSwitching := false this, next := b.tls.Select(extPkt, b.currentLayer.Temporal, b.targetLayer.Temporal) - b.currentLayer.Temporal = next - return this + if next != b.currentLayer.Temporal { + isSwitching = true + + b.previousLayer = b.currentLayer + b.currentLayer.Temporal = next + + b.logger.Infow( + "updating temporal layer", + "previous", b.previousLayer, + "current", b.currentLayer, + "previousParked", b.previousParkedLayer, + "parked", b.parkedLayer, + "previousTarget", b.previousTargetLayer, + "target", b.targetLayer, + "max", b.maxLayer, + "req", b.requestSpatial, + "maxSeen", b.maxSeenLayer, + ) + } + return this, isSwitching } - return b.currentLayer.Temporal + return b.currentLayer.Temporal, false } diff --git a/pkg/sfu/videolayerselector/dependencydescriptor.go b/pkg/sfu/videolayerselector/dependencydescriptor.go index 3cc5fcf70..7b8c95310 100644 --- a/pkg/sfu/videolayerselector/dependencydescriptor.go +++ b/pkg/sfu/videolayerselector/dependencydescriptor.go @@ -16,8 +16,9 @@ type DependencyDescriptor struct { frameNum *utils.WrapAround[uint16, uint64] decisions *SelectorDecisionCache - activeDecodeTargetsBitmask *uint32 - structure *dede.FrameDependencyStructure + previousActiveDecodeTargetsBitmask *uint32 + activeDecodeTargetsBitmask *uint32 + structure *dede.FrameDependencyStructure chains []*FrameChain @@ -183,6 +184,7 @@ func (d *DependencyDescriptor) Select(extPkt *buffer.ExtPacket, _layer int32) (r } if d.currentLayer != highestDecodeTarget.Layer { + result.IsSwitching = true if !d.currentLayer.IsValid() { result.IsResuming = true d.logger.Infow( @@ -196,8 +198,13 @@ func (d *DependencyDescriptor) Select(extPkt *buffer.ExtPacket, _layer int32) (r "feed", extPkt.Packet.SSRC, ) } + + d.previousLayer = d.currentLayer d.currentLayer = highestDecodeTarget.Layer + + d.previousActiveDecodeTargetsBitmask = d.activeDecodeTargetsBitmask d.activeDecodeTargetsBitmask = buffer.GetActiveDecodeTargetBitmask(d.currentLayer, ddwdt.DecodeTargets) + if d.currentLayer.Spatial == d.requestSpatial { result.IsSwitchingToRequestSpatial = true } @@ -206,7 +213,9 @@ func (d *DependencyDescriptor) Select(extPkt *buffer.ExtPacket, _layer int32) (r result.MaxSpatialLayer = d.currentLayer.Spatial d.logger.Infow( "reached max layer", + "previous", d.previousLayer, "current", d.currentLayer, + "previousTarget", d.previousTargetLayer, "target", d.targetLayer, "max", d.maxLayer, "layer", fd.SpatialId, @@ -255,12 +264,10 @@ func (d *DependencyDescriptor) Select(extPkt *buffer.ExtPacket, _layer int32) (r return } -func (d *DependencyDescriptor) SetTarget(targetLayer buffer.VideoLayer) { - if targetLayer == d.targetLayer { - return - } +func (d *DependencyDescriptor) Rollback() { + d.activeDecodeTargetsBitmask = d.previousActiveDecodeTargetsBitmask - d.Base.SetTarget(targetLayer) + d.Base.Rollback() } func (d *DependencyDescriptor) updateDependencyStructure(structure *dede.FrameDependencyStructure, decodeTargets []buffer.DependencyDescriptorDecodeTarget) { diff --git a/pkg/sfu/videolayerselector/simulcast.go b/pkg/sfu/videolayerselector/simulcast.go index fe6f33659..ebb80d113 100644 --- a/pkg/sfu/videolayerselector/simulcast.go +++ b/pkg/sfu/videolayerselector/simulcast.go @@ -26,111 +26,12 @@ func (s *Simulcast) IsOvershootOkay() bool { } func (s *Simulcast) Select(extPkt *buffer.ExtPacket, layer int32) (result VideoLayerSelectorResult) { - if s.currentLayer.Spatial != s.targetLayer.Spatial { - // Three things to check when not locked to target - // 1. Resumable layer - don't need a key frame - // 2. Opportunistic layer upgrade - needs a key frame - // 3. Need to downgrade - needs a key frame - isActive := s.currentLayer.IsValid() - found := false - if s.parkedLayer.IsValid() { - if s.parkedLayer.Spatial == layer { - s.logger.Infow( - "resuming at parked layer", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "parked", s.parkedLayer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - s.currentLayer = s.parkedLayer - found = true - } - } else { - if extPkt.KeyFrame { - if layer > s.currentLayer.Spatial && layer <= s.targetLayer.Spatial { - s.logger.Infow( - "upgrading layer", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "layer", layer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - found = true - } - - if layer < s.currentLayer.Spatial && layer >= s.targetLayer.Spatial { - s.logger.Infow( - "downgrading layer", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "layer", layer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - found = true - } - - if found { - s.currentLayer.Spatial = layer - s.currentLayer.Temporal = extPkt.VideoLayer.Temporal - } - } + populateSwitches := func(isActive bool, reason string) { + result.IsSwitching = true + if !isActive { + result.IsResuming = true } - if found { - if !isActive { - result.IsResuming = true - } - - s.SetParked(buffer.InvalidLayer) - - if s.currentLayer.Spatial == s.requestSpatial { - result.IsSwitchingToRequestSpatial = true - } - - if s.currentLayer.Spatial >= s.maxLayer.Spatial { - result.IsSwitchingToMaxSpatial = true - result.MaxSpatialLayer = s.currentLayer.Spatial - s.logger.Infow( - "reached max layer", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "layer", layer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - } - - if s.currentLayer.Spatial >= s.maxLayer.Spatial || s.currentLayer.Spatial == s.maxSeenLayer.Spatial { - s.targetLayer.Spatial = s.currentLayer.Spatial - } - } - } - - // if locked to higher than max layer due to overshoot, check if it can be dialed back - if s.currentLayer.Spatial > s.maxLayer.Spatial && layer <= s.maxLayer.Spatial && extPkt.KeyFrame { - s.logger.Infow( - "adjusting overshoot", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "layer", layer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - s.currentLayer.Spatial = layer - if s.currentLayer.Spatial == s.requestSpatial { result.IsSwitchingToRequestSpatial = true } @@ -138,11 +39,92 @@ func (s *Simulcast) Select(extPkt *buffer.ExtPacket, layer int32) (result VideoL if s.currentLayer.Spatial >= s.maxLayer.Spatial { result.IsSwitchingToMaxSpatial = true result.MaxSpatialLayer = s.currentLayer.Spatial + if reason != "" { + reason += ", " + } + reason += "reached max layer" } + if reason != "" { + s.logger.Infow( + reason, + "previous", s.previousLayer, + "current", s.currentLayer, + "previousParked", s.previousParkedLayer, + "parked", s.parkedLayer, + "previousTarget", s.previousTargetLayer, + "target", s.targetLayer, + "max", s.maxLayer, + "layer", layer, + "req", s.requestSpatial, + "maxSeen", s.maxSeenLayer, + "feed", extPkt.Packet.SSRC, + ) + } + } + + if s.currentLayer.Spatial != s.targetLayer.Spatial { + currentLayer := s.currentLayer + + // Three things to check when not locked to target + // 1. Resumable layer - don't need a key frame + // 2. Opportunistic layer upgrade - needs a key frame + // 3. Need to downgrade - needs a key frame + isActive := s.currentLayer.IsValid() + found := false + reason := "" + if s.parkedLayer.IsValid() { + if s.parkedLayer.Spatial == layer { + reason = "resuming at parked layer" + currentLayer = s.parkedLayer + found = true + } + } else { + if extPkt.KeyFrame { + if layer > s.currentLayer.Spatial && layer <= s.targetLayer.Spatial { + reason = "upgrading layer" + found = true + } + + if layer < s.currentLayer.Spatial && layer >= s.targetLayer.Spatial { + reason = "downgrading layer" + found = true + } + + if found { + currentLayer.Spatial = layer + currentLayer.Temporal = extPkt.VideoLayer.Temporal + } + } + } + + if found { + s.previousParkedLayer = s.parkedLayer + s.parkedLayer = buffer.InvalidLayer + + s.previousLayer = s.currentLayer + s.currentLayer = currentLayer + + s.previousTargetLayer = s.targetLayer + if s.currentLayer.Spatial >= s.maxLayer.Spatial || s.currentLayer.Spatial == s.maxSeenLayer.Spatial { + s.targetLayer.Spatial = s.currentLayer.Spatial + } + + populateSwitches(isActive, reason) + } + } + + // if locked to higher than max layer due to overshoot, check if it can be dialed back + if s.currentLayer.Spatial > s.maxLayer.Spatial && layer <= s.maxLayer.Spatial && extPkt.KeyFrame { + s.previousLayer = s.currentLayer + s.currentLayer.Spatial = layer + + s.previousTargetLayer = s.targetLayer if s.currentLayer.Spatial >= s.maxLayer.Spatial || s.currentLayer.Spatial == s.maxSeenLayer.Spatial { s.targetLayer.Spatial = layer } + + populateSwitches(true, "adjusting overshoot") } result.RTPMarker = extPkt.Packet.Marker diff --git a/pkg/sfu/videolayerselector/videolayerselector.go b/pkg/sfu/videolayerselector/videolayerselector.go index b108976bf..ffbb9f42c 100644 --- a/pkg/sfu/videolayerselector/videolayerselector.go +++ b/pkg/sfu/videolayerselector/videolayerselector.go @@ -8,6 +8,7 @@ import ( type VideoLayerSelectorResult struct { IsSelected bool IsRelevant bool + IsSwitching bool IsResuming bool IsSwitchingToRequestSpatial bool IsSwitchingToMaxSpatial bool @@ -46,5 +47,6 @@ type VideoLayerSelector interface { GetCurrent() buffer.VideoLayer Select(extPkt *buffer.ExtPacket, layer int32) VideoLayerSelectorResult - SelectTemporal(extPkt *buffer.ExtPacket) int32 + SelectTemporal(extPkt *buffer.ExtPacket) (int32, bool) + Rollback() } diff --git a/pkg/sfu/videolayerselector/vp9.go b/pkg/sfu/videolayerselector/vp9.go index 4a77f3675..ed1a165b6 100644 --- a/pkg/sfu/videolayerselector/vp9.go +++ b/pkg/sfu/videolayerselector/vp9.go @@ -75,6 +75,7 @@ func (v *VP9) Select(extPkt *buffer.ExtPacket, _layer int32) (result VideoLayerS } if updatedLayer != v.currentLayer { + result.IsSwitching = true if !v.currentLayer.IsValid() && updatedLayer.IsValid() { result.IsResuming = true } @@ -89,6 +90,7 @@ func (v *VP9) Select(extPkt *buffer.ExtPacket, _layer int32) (result VideoLayerS v.logger.Infow( "reached max layer", "current", v.currentLayer, + "updated", updatedLayer, "target", v.targetLayer, "max", v.maxLayer, "layer", extPkt.VideoLayer.Spatial, @@ -98,6 +100,7 @@ func (v *VP9) Select(extPkt *buffer.ExtPacket, _layer int32) (result VideoLayerS ) } + v.previousLayer = v.currentLayer v.currentLayer = updatedLayer } }