From 8dc2c005c33cf45270324793191dc581a13b91e0 Mon Sep 17 00:00:00 2001 From: Raja Subramanian Date: Wed, 12 Jul 2023 14:12:00 +0530 Subject: [PATCH] Add ability to roll back video layer selection. (#1871) * Add ability to roll back video layer selection. Not currently useful, but it is possible to do things like not applying a layer switch if the switch point time stamp is too far back. Add ability to roll back a layer switch and invoke rollback if a packet was selected for forwarding, but a subsequent error or decision to drop the packet can rollback layer switch if that was the switching packet. In current code, the paths where a packet can be dropped after selection does not happen at switch points. So, it was okay to apply the selection unconditionally. But, adding the call to rollback in the current code also in all paths where packet is dropped after selection for consistent code flow. * separate switch for temporal layer --- pkg/sfu/forwarder.go | 13 +- pkg/sfu/videolayerselector/base.go | 78 ++++++-- .../dependencydescriptor.go | 21 +- pkg/sfu/videolayerselector/simulcast.go | 188 ++++++++---------- .../videolayerselector/videolayerselector.go | 4 +- pkg/sfu/videolayerselector/vp9.go | 3 + 6 files changed, 179 insertions(+), 128 deletions(-) diff --git a/pkg/sfu/forwarder.go b/pkg/sfu/forwarder.go index 33c196b86..8bb21c387 100644 --- a/pkg/sfu/forwarder.go +++ b/pkg/sfu/forwarder.go @@ -1594,6 +1594,12 @@ func (f *Forwarder) getTranslationParamsAudio(extPkt *buffer.ExtPacket, layer in // should be called with lock held func (f *Forwarder) getTranslationParamsVideo(extPkt *buffer.ExtPacket, layer int32) (*TranslationParams, error) { + maybeRollback := func(isSwitching bool) { + if isSwitching { + f.vls.Rollback() + } + } + tp := &TranslationParams{} if !f.vls.GetTarget().IsValid() { @@ -1640,20 +1646,23 @@ func (f *Forwarder) getTranslationParamsVideo(extPkt *buffer.ExtPacket, layer in // To differentiate between the two cases, drop only when in DEFICIENT state. // tp.shouldDrop = true + maybeRollback(result.IsSwitching) return tp, nil } _, err := f.getTranslationParamsCommon(extPkt, layer, tp) if tp.shouldDrop || len(extPkt.Packet.Payload) == 0 { + maybeRollback(result.IsSwitching) return tp, err } // codec specific forwarding check and any needed packet munging + tl, isSwitching := f.vls.SelectTemporal(extPkt) codecBytes, err := f.codecMunger.UpdateAndGet( extPkt, tp.rtp.snOrdering == SequenceNumberOrderingOutOfOrder, tp.rtp.snOrdering == SequenceNumberOrderingGap, - f.vls.SelectTemporal(extPkt), + tl, ) if err != nil { tp.rtp = nil @@ -1663,9 +1672,11 @@ func (f *Forwarder) getTranslationParamsVideo(extPkt *buffer.ExtPacket, layer in // filtered temporal layer, update sequence number offset to prevent holes f.rtpMunger.PacketDropped(extPkt) } + maybeRollback(result.IsSwitching || isSwitching) return tp, nil } + maybeRollback(result.IsSwitching || isSwitching) return tp, err } diff --git a/pkg/sfu/videolayerselector/base.go b/pkg/sfu/videolayerselector/base.go index 534cb751b..7323fc7ae 100644 --- a/pkg/sfu/videolayerselector/base.go +++ b/pkg/sfu/videolayerselector/base.go @@ -11,25 +11,33 @@ type Base struct { tls temporallayerselector.TemporalLayerSelector - maxLayer buffer.VideoLayer - targetLayer buffer.VideoLayer + maxLayer buffer.VideoLayer + maxSeenLayer buffer.VideoLayer + + targetLayer buffer.VideoLayer + previousTargetLayer buffer.VideoLayer + requestSpatial int32 - maxSeenLayer buffer.VideoLayer - parkedLayer buffer.VideoLayer + parkedLayer buffer.VideoLayer + previousParkedLayer buffer.VideoLayer - currentLayer buffer.VideoLayer + currentLayer buffer.VideoLayer + previousLayer buffer.VideoLayer } func NewBase(logger logger.Logger) *Base { return &Base{ - logger: logger, - maxLayer: buffer.InvalidLayer, - targetLayer: buffer.InvalidLayer, // start off with nothing, let streamallocator/opportunistic forwarder set the target - requestSpatial: buffer.InvalidLayerSpatial, - maxSeenLayer: buffer.InvalidLayer, - parkedLayer: buffer.InvalidLayer, - currentLayer: buffer.InvalidLayer, + logger: logger, + maxLayer: buffer.InvalidLayer, + maxSeenLayer: buffer.InvalidLayer, + targetLayer: buffer.InvalidLayer, // start off with nothing, let streamallocator/opportunistic forwarder set the target + previousTargetLayer: buffer.InvalidLayer, + requestSpatial: buffer.InvalidLayerSpatial, + parkedLayer: buffer.InvalidLayer, + previousParkedLayer: buffer.InvalidLayer, + currentLayer: buffer.InvalidLayer, + previousLayer: buffer.InvalidLayer, } } @@ -58,6 +66,7 @@ func (b *Base) GetMax() buffer.VideoLayer { } func (b *Base) SetTarget(targetLayer buffer.VideoLayer) { + b.previousTargetLayer = targetLayer b.targetLayer = targetLayer } @@ -115,12 +124,49 @@ func (b *Base) Select(_extPkt *buffer.ExtPacket, _layer int32) (result VideoLaye return } -func (b *Base) SelectTemporal(extPkt *buffer.ExtPacket) int32 { +func (b *Base) Rollback() { + b.logger.Infow( + "rolling back", + "previous", b.previousLayer, + "current", b.currentLayer, + "previousParked", b.previousParkedLayer, + "parked", b.parkedLayer, + "previousTarget", b.previousTargetLayer, + "target", b.targetLayer, + "max", b.maxLayer, + "req", b.requestSpatial, + "maxSeen", b.maxSeenLayer, + ) + b.parkedLayer = b.previousParkedLayer + b.currentLayer = b.previousLayer + b.targetLayer = b.previousTargetLayer +} + +func (b *Base) SelectTemporal(extPkt *buffer.ExtPacket) (int32, bool) { if b.tls != nil { + isSwitching := false this, next := b.tls.Select(extPkt, b.currentLayer.Temporal, b.targetLayer.Temporal) - b.currentLayer.Temporal = next - return this + if next != b.currentLayer.Temporal { + isSwitching = true + + b.previousLayer = b.currentLayer + b.currentLayer.Temporal = next + + b.logger.Infow( + "updating temporal layer", + "previous", b.previousLayer, + "current", b.currentLayer, + "previousParked", b.previousParkedLayer, + "parked", b.parkedLayer, + "previousTarget", b.previousTargetLayer, + "target", b.targetLayer, + "max", b.maxLayer, + "req", b.requestSpatial, + "maxSeen", b.maxSeenLayer, + ) + } + return this, isSwitching } - return b.currentLayer.Temporal + return b.currentLayer.Temporal, false } diff --git a/pkg/sfu/videolayerselector/dependencydescriptor.go b/pkg/sfu/videolayerselector/dependencydescriptor.go index 3cc5fcf70..7b8c95310 100644 --- a/pkg/sfu/videolayerselector/dependencydescriptor.go +++ b/pkg/sfu/videolayerselector/dependencydescriptor.go @@ -16,8 +16,9 @@ type DependencyDescriptor struct { frameNum *utils.WrapAround[uint16, uint64] decisions *SelectorDecisionCache - activeDecodeTargetsBitmask *uint32 - structure *dede.FrameDependencyStructure + previousActiveDecodeTargetsBitmask *uint32 + activeDecodeTargetsBitmask *uint32 + structure *dede.FrameDependencyStructure chains []*FrameChain @@ -183,6 +184,7 @@ func (d *DependencyDescriptor) Select(extPkt *buffer.ExtPacket, _layer int32) (r } if d.currentLayer != highestDecodeTarget.Layer { + result.IsSwitching = true if !d.currentLayer.IsValid() { result.IsResuming = true d.logger.Infow( @@ -196,8 +198,13 @@ func (d *DependencyDescriptor) Select(extPkt *buffer.ExtPacket, _layer int32) (r "feed", extPkt.Packet.SSRC, ) } + + d.previousLayer = d.currentLayer d.currentLayer = highestDecodeTarget.Layer + + d.previousActiveDecodeTargetsBitmask = d.activeDecodeTargetsBitmask d.activeDecodeTargetsBitmask = buffer.GetActiveDecodeTargetBitmask(d.currentLayer, ddwdt.DecodeTargets) + if d.currentLayer.Spatial == d.requestSpatial { result.IsSwitchingToRequestSpatial = true } @@ -206,7 +213,9 @@ func (d *DependencyDescriptor) Select(extPkt *buffer.ExtPacket, _layer int32) (r result.MaxSpatialLayer = d.currentLayer.Spatial d.logger.Infow( "reached max layer", + "previous", d.previousLayer, "current", d.currentLayer, + "previousTarget", d.previousTargetLayer, "target", d.targetLayer, "max", d.maxLayer, "layer", fd.SpatialId, @@ -255,12 +264,10 @@ func (d *DependencyDescriptor) Select(extPkt *buffer.ExtPacket, _layer int32) (r return } -func (d *DependencyDescriptor) SetTarget(targetLayer buffer.VideoLayer) { - if targetLayer == d.targetLayer { - return - } +func (d *DependencyDescriptor) Rollback() { + d.activeDecodeTargetsBitmask = d.previousActiveDecodeTargetsBitmask - d.Base.SetTarget(targetLayer) + d.Base.Rollback() } func (d *DependencyDescriptor) updateDependencyStructure(structure *dede.FrameDependencyStructure, decodeTargets []buffer.DependencyDescriptorDecodeTarget) { diff --git a/pkg/sfu/videolayerselector/simulcast.go b/pkg/sfu/videolayerselector/simulcast.go index fe6f33659..ebb80d113 100644 --- a/pkg/sfu/videolayerselector/simulcast.go +++ b/pkg/sfu/videolayerselector/simulcast.go @@ -26,111 +26,12 @@ func (s *Simulcast) IsOvershootOkay() bool { } func (s *Simulcast) Select(extPkt *buffer.ExtPacket, layer int32) (result VideoLayerSelectorResult) { - if s.currentLayer.Spatial != s.targetLayer.Spatial { - // Three things to check when not locked to target - // 1. Resumable layer - don't need a key frame - // 2. Opportunistic layer upgrade - needs a key frame - // 3. Need to downgrade - needs a key frame - isActive := s.currentLayer.IsValid() - found := false - if s.parkedLayer.IsValid() { - if s.parkedLayer.Spatial == layer { - s.logger.Infow( - "resuming at parked layer", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "parked", s.parkedLayer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - s.currentLayer = s.parkedLayer - found = true - } - } else { - if extPkt.KeyFrame { - if layer > s.currentLayer.Spatial && layer <= s.targetLayer.Spatial { - s.logger.Infow( - "upgrading layer", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "layer", layer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - found = true - } - - if layer < s.currentLayer.Spatial && layer >= s.targetLayer.Spatial { - s.logger.Infow( - "downgrading layer", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "layer", layer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - found = true - } - - if found { - s.currentLayer.Spatial = layer - s.currentLayer.Temporal = extPkt.VideoLayer.Temporal - } - } + populateSwitches := func(isActive bool, reason string) { + result.IsSwitching = true + if !isActive { + result.IsResuming = true } - if found { - if !isActive { - result.IsResuming = true - } - - s.SetParked(buffer.InvalidLayer) - - if s.currentLayer.Spatial == s.requestSpatial { - result.IsSwitchingToRequestSpatial = true - } - - if s.currentLayer.Spatial >= s.maxLayer.Spatial { - result.IsSwitchingToMaxSpatial = true - result.MaxSpatialLayer = s.currentLayer.Spatial - s.logger.Infow( - "reached max layer", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "layer", layer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - } - - if s.currentLayer.Spatial >= s.maxLayer.Spatial || s.currentLayer.Spatial == s.maxSeenLayer.Spatial { - s.targetLayer.Spatial = s.currentLayer.Spatial - } - } - } - - // if locked to higher than max layer due to overshoot, check if it can be dialed back - if s.currentLayer.Spatial > s.maxLayer.Spatial && layer <= s.maxLayer.Spatial && extPkt.KeyFrame { - s.logger.Infow( - "adjusting overshoot", - "current", s.currentLayer, - "target", s.targetLayer, - "max", s.maxLayer, - "layer", layer, - "req", s.requestSpatial, - "maxSeen", s.maxSeenLayer, - "feed", extPkt.Packet.SSRC, - ) - s.currentLayer.Spatial = layer - if s.currentLayer.Spatial == s.requestSpatial { result.IsSwitchingToRequestSpatial = true } @@ -138,11 +39,92 @@ func (s *Simulcast) Select(extPkt *buffer.ExtPacket, layer int32) (result VideoL if s.currentLayer.Spatial >= s.maxLayer.Spatial { result.IsSwitchingToMaxSpatial = true result.MaxSpatialLayer = s.currentLayer.Spatial + if reason != "" { + reason += ", " + } + reason += "reached max layer" } + if reason != "" { + s.logger.Infow( + reason, + "previous", s.previousLayer, + "current", s.currentLayer, + "previousParked", s.previousParkedLayer, + "parked", s.parkedLayer, + "previousTarget", s.previousTargetLayer, + "target", s.targetLayer, + "max", s.maxLayer, + "layer", layer, + "req", s.requestSpatial, + "maxSeen", s.maxSeenLayer, + "feed", extPkt.Packet.SSRC, + ) + } + } + + if s.currentLayer.Spatial != s.targetLayer.Spatial { + currentLayer := s.currentLayer + + // Three things to check when not locked to target + // 1. Resumable layer - don't need a key frame + // 2. Opportunistic layer upgrade - needs a key frame + // 3. Need to downgrade - needs a key frame + isActive := s.currentLayer.IsValid() + found := false + reason := "" + if s.parkedLayer.IsValid() { + if s.parkedLayer.Spatial == layer { + reason = "resuming at parked layer" + currentLayer = s.parkedLayer + found = true + } + } else { + if extPkt.KeyFrame { + if layer > s.currentLayer.Spatial && layer <= s.targetLayer.Spatial { + reason = "upgrading layer" + found = true + } + + if layer < s.currentLayer.Spatial && layer >= s.targetLayer.Spatial { + reason = "downgrading layer" + found = true + } + + if found { + currentLayer.Spatial = layer + currentLayer.Temporal = extPkt.VideoLayer.Temporal + } + } + } + + if found { + s.previousParkedLayer = s.parkedLayer + s.parkedLayer = buffer.InvalidLayer + + s.previousLayer = s.currentLayer + s.currentLayer = currentLayer + + s.previousTargetLayer = s.targetLayer + if s.currentLayer.Spatial >= s.maxLayer.Spatial || s.currentLayer.Spatial == s.maxSeenLayer.Spatial { + s.targetLayer.Spatial = s.currentLayer.Spatial + } + + populateSwitches(isActive, reason) + } + } + + // if locked to higher than max layer due to overshoot, check if it can be dialed back + if s.currentLayer.Spatial > s.maxLayer.Spatial && layer <= s.maxLayer.Spatial && extPkt.KeyFrame { + s.previousLayer = s.currentLayer + s.currentLayer.Spatial = layer + + s.previousTargetLayer = s.targetLayer if s.currentLayer.Spatial >= s.maxLayer.Spatial || s.currentLayer.Spatial == s.maxSeenLayer.Spatial { s.targetLayer.Spatial = layer } + + populateSwitches(true, "adjusting overshoot") } result.RTPMarker = extPkt.Packet.Marker diff --git a/pkg/sfu/videolayerselector/videolayerselector.go b/pkg/sfu/videolayerselector/videolayerselector.go index b108976bf..ffbb9f42c 100644 --- a/pkg/sfu/videolayerselector/videolayerselector.go +++ b/pkg/sfu/videolayerselector/videolayerselector.go @@ -8,6 +8,7 @@ import ( type VideoLayerSelectorResult struct { IsSelected bool IsRelevant bool + IsSwitching bool IsResuming bool IsSwitchingToRequestSpatial bool IsSwitchingToMaxSpatial bool @@ -46,5 +47,6 @@ type VideoLayerSelector interface { GetCurrent() buffer.VideoLayer Select(extPkt *buffer.ExtPacket, layer int32) VideoLayerSelectorResult - SelectTemporal(extPkt *buffer.ExtPacket) int32 + SelectTemporal(extPkt *buffer.ExtPacket) (int32, bool) + Rollback() } diff --git a/pkg/sfu/videolayerselector/vp9.go b/pkg/sfu/videolayerselector/vp9.go index 4a77f3675..ed1a165b6 100644 --- a/pkg/sfu/videolayerselector/vp9.go +++ b/pkg/sfu/videolayerselector/vp9.go @@ -75,6 +75,7 @@ func (v *VP9) Select(extPkt *buffer.ExtPacket, _layer int32) (result VideoLayerS } if updatedLayer != v.currentLayer { + result.IsSwitching = true if !v.currentLayer.IsValid() && updatedLayer.IsValid() { result.IsResuming = true } @@ -89,6 +90,7 @@ func (v *VP9) Select(extPkt *buffer.ExtPacket, _layer int32) (result VideoLayerS v.logger.Infow( "reached max layer", "current", v.currentLayer, + "updated", updatedLayer, "target", v.targetLayer, "max", v.maxLayer, "layer", extPkt.VideoLayer.Spatial, @@ -98,6 +100,7 @@ func (v *VP9) Select(extPkt *buffer.ExtPacket, _layer int32) (result VideoLayerS ) } + v.previousLayer = v.currentLayer v.currentLayer = updatedLayer } }