Files
livekit/pkg/sfu/streamallocator.go
cnderrauber 1e1aaeb86b Separate from ion-sfu (#171)
* Separate from ion-sfu

changes:
1. extract pkg/buffer, twcc, sfu, relay, stats, logger

2. to solve cycle import, move ion-sfu/pkg/logger to pkg/sfu/logger

3. replace pion/ion-sfu => ./
reason: will change import pion/ion-sfu/pkg/* to livekit-server/pkg/*
after this pr merged. Just not change any code in this pr, because it
will confused with the separate code from ion-sfu in review.

* Move code from ion-sfu to pkg/sfu

* fix build error for resovle conflict

Co-authored-by: cnderrauber <zengjie9004@gmail.com>
2021-11-09 12:03:16 +08:00

1052 lines
34 KiB
Go

//
// Design of StreamAllocator
//
// Each participant uses one peer connection for all downstream
// traffic. It is possible that the downstream peer connection
// gets congested. In such an event, the SFU (sender on that
// peer connection) should take measures to mitigate the
// media loss and latency that would result from such a congestion.
//
// This module is supposed to aggregate down stream tracks and
// drive bandwidth allocation with the goals of
// - Try and send highest quality media
// - React as quickly as possible to mitigate congestion
//
// Setup:
// ------
// The following should be done to set up a stream allocator
// - There will be one of these per subscriber peer connection.
// Created in livekit-sever/transport.go for subscriber type
// peer connections.
// - In `AddSubscribedTrack` of livekit-server/participant.go, the created
// downTrack is added to the stream allocator.
// - In `RemoveSubscribedTrack` of livekit-server/participant.go,
// the downTrack is removed from the stream allocator.
// - Both video and audio tracks are added to this module. Although the
// stream allocator does not act on audio track forwarding, audio track
// information like loss rate may be used to adjust available bandwidth.
//
// Callbacks:
// ----------
// StreamAllocator registers the following callbacks on all registered down tracks
// - OnREMB: called when down track receives RTCP REMB. Note that REMB is a
// peer connection level aggregate metric. But, it contains all the SSRCs
// used in the calculation of that REMB. So, there could be multiple
// callbacks per RTCP REMB received (one each from down track pertaining
// to the contained SSRCs) with the same estimated channel capacity.
// - AddReceiverReportListener: called when down track received RTCP RR (Receiver Report).
// - OnAvailableLayersChanged: called when the feeding track changes its layers.
// This could happen due to publisher throttling layers due to upstream congestion
// in its path.
// - OnSubscriptionChanged: called when a down track settings are changed resulting
// from client side requests (muting/pausing a video or limiting maximum layer).
// - OnPacketSent: called when a media packet is forwarded by the down track. As
// this happens once per forwarded packet, processing in this callback should be
// kept to a minimum.
//
// The following may be needed depending on the StreamAllocator algorithm
// - OnBitrateUpdate: called periodically to update the bit rate at which a down track
// is forwarding. This can be used to measure any overshoot and adjust allocations
// accordingly. This may have granular information like primary bitrate, retransmitted
// bitrate and padding bitrate.
//
// State machine:
// --------------
// The most critical component. It should monitor current state of channel and
// take actions to provide the best user experience by striving to achieve the
// goals outlined earlier
//
// States:
// ------
// - State_PRE_COMMIT: Before the first estimate is committed.
// Estimated channel capacity is initialized to some
// arbitrarily high value to start streaming immediately.
// Serves two purposes
// 1. Gives the bandwidth estimation algorithms data
// 2. Start streaming as soon as a user joins. Imagine
// a user joining a room with 10 participants already
// in it. That user should start receiving streams
// from everybody as soon as possible.
// - State_STABLE: When all streams are forwarded at their optimal requested layers.
// - State_DEFICIENT: When at least one stream is not able to forward optimal requested layers.
// - State_GRATUITOUS_PROBING: When all streams are forwarded at their optimal requested layers,
// but probing for extra capacity to be prepared for cases like
// new participant joining and streaming OR an existing participant
// starting a new stream like enabling camera or screen share.
//
// Signals:
// -------
// Each state should take action based on these signals and advance the state machine based
// on the result of the action.
// - Signal_ADD_TRACK: A new track has been added.
// - Signal_REMOVE_TRACK: An existing track has been removed.
// - Signal_ESTIMATE_INCREASE: Estimated channel capacity is increasing.
// - Signal_ESTIMATE_DECREASE: Estimated channel capacity is decreasing. Note that when
// channel gets congested, it is possible to get several of these
// in a very short time window.
// - Signal_RECEIVER_REPORT: An RTCP Receiver Report received from some down track.
// - Signal_AVAILABLE_LAYERS_ADD: Available layers of publisher changed, new layer(s) available.
// - Signal_AVAILABLE_LAYERS_REMOVE: Available layers of publisher changed, some previously
// available layer(s) not available anymore.
// - Signal_SUBSCRIPTION_CHANGE: Subscription changed (mute/requested layers changed).
// - Signal_PERIODIC_PING: Periodic ping
//
// There are several interesting challenges which are documented in relevant code below.
//
package sfu
import (
"math"
"sync"
"time"
"github.com/pion/rtcp"
"github.com/pion/webrtc/v3"
)
const (
InitialChannelCapacity = 100 * 1000 * 1000 // 100 Mbps
EstimateEpsilon = 2000 // 2 kbps
BoostPct = 8
BoostMinBps = 20 * 1000 // 20 kbps
BoostMaxBps = 60 * 1000 // 60 kbps
GratuitousProbeHeadroomBps = 1 * 1000 * 1000 // if headroom is more than 1 Mbps, don't probe
GratuitousProbePct = 10
GratuitousProbeMaxBps = 300 * 1000 // 300 kbps
GratuitousProbeMinDurationMs = 500
GratuitousProbeMaxDurationMs = 600
AudioLossWeight = 0.75
VideoLossWeight = 0.25
)
type State int
const (
State_PRE_COMMIT State = iota
State_STABLE
State_DEFICIENT
State_GRATUITOUS_PROBING
)
type Signal int
const (
Signal_NONE Signal = iota
Signal_ADD_TRACK
Signal_REMOVE_TRACK
Signal_ESTIMATE_INCREASE
Signal_ESTIMATE_DECREASE
Signal_RECEIVER_REPORT
Signal_AVAILABLE_LAYERS_ADD
Signal_AVAILABLE_LAYERS_REMOVE
Signal_SUBSCRIPTION_CHANGE
Signal_PERIODIC_PING
)
type BoostMode int
const (
BoostMode_LAYER BoostMode = iota
BoostMode_BANDWIDTH
)
var (
// LK-TODO-START
// These constants will definitely require more tweaking.
// In fact, simple time tresholded rules most proably will not be enough.
// LK-TODO-END
EstimateCommitMs = 2 * 1000 * time.Millisecond // 2 seconds
ProbeWaitMs = 5 * 1000 * time.Millisecond // 5 seconds
GratuitousProbeWaitMs = 8 * 1000 * time.Millisecond // 8 seconds
BoostWaitMs = 3 * 1000 * time.Millisecond // 3 seconds
)
// LK-TODO add logger and log interesting events
type StreamAllocator struct {
estimateMu sync.RWMutex
trackingSSRC uint32
committedChannelCapacity uint64
lastCommitTime time.Time
prevReceivedEstimate uint64
receivedEstimate uint64
lastEstimateDecreaseTime time.Time
boostMode BoostMode
boostedChannelCapacity uint64
lastBoostTime time.Time
tracksMu sync.RWMutex
tracks map[string]*Track
prober *Prober
state State
chMu sync.RWMutex
eventCh chan []Event
runningCh chan struct{}
}
type Event struct {
Signal Signal
DownTrack *DownTrack
}
func NewStreamAllocator() *StreamAllocator {
s := &StreamAllocator{
committedChannelCapacity: InitialChannelCapacity,
lastCommitTime: time.Now(),
receivedEstimate: InitialChannelCapacity,
lastEstimateDecreaseTime: time.Now(),
boostMode: BoostMode_LAYER,
tracks: make(map[string]*Track),
prober: NewProber(),
state: State_PRE_COMMIT,
eventCh: make(chan []Event, 10),
runningCh: make(chan struct{}),
}
s.prober.OnSendProbe(s.onSendProbe)
return s
}
func (s *StreamAllocator) Start() {
go s.processEvents()
go s.ping()
}
func (s *StreamAllocator) Stop() {
s.chMu.Lock()
defer s.chMu.Unlock()
close(s.runningCh)
close(s.eventCh)
}
func (s *StreamAllocator) AddTrack(downTrack *DownTrack) {
downTrack.OnREMB(s.onREMB)
downTrack.AddReceiverReportListener(s.onReceiverReport)
downTrack.OnAvailableLayersChanged(s.onAvailableLayersChanged)
downTrack.OnSubscriptionChanged(s.onSubscriptionChanged)
downTrack.OnPacketSent(s.onPacketSent)
s.tracksMu.Lock()
s.tracks[downTrack.ID()] = NewTrack(downTrack)
s.tracksMu.Unlock()
s.postEvent(Signal_ADD_TRACK, downTrack)
}
func (s *StreamAllocator) RemoveTrack(downTrack *DownTrack) {
s.tracksMu.Lock()
if _, ok := s.tracks[downTrack.ID()]; !ok {
s.tracksMu.Unlock()
return
}
delete(s.tracks, downTrack.ID())
s.tracksMu.Unlock()
s.postEvent(Signal_REMOVE_TRACK, downTrack)
}
func (s *StreamAllocator) onREMB(downTrack *DownTrack, remb *rtcp.ReceiverEstimatedMaximumBitrate) {
// the channel capacity is estimated at a peer connection level. All down tracks
// in the peer connection will end up calling this for a REMB report with
// the same estimated channel capacity. Use a tracking SSRC to lock onto to
// one report. As SSRCs can be dropped over time, update tracking SSRC as needed
//
// A couple of things to keep in mind
// - REMB reports could be sent gratuitously as a way of providing
// periodic feedback, i. e. even if the estimated capacity does not
// change, there could be REMB packets on the wire. Those gratuitous
// REMBs should not trigger anything bad.
// - As each down track will issue this callback for the same REMB packet
// from the wire, theoretically it is possible that one down track's
// callback from previous REMB comes after another down track's callback
// from the new REMB. REMBs could fire very quickly especially when
// the network is entering congestion.
// LK-TODO-START
// Need to check if the same SSRC reports can somehow race, i.e. does pion send
// RTCP dispatch for same SSRC on different threads? If not, the tracking SSRC
// should prevent racing
// LK-TODO-END
s.estimateMu.Lock()
found := false
for _, ssrc := range remb.SSRCs {
if ssrc == s.trackingSSRC {
found = true
break
}
}
if !found {
if len(remb.SSRCs) == 0 {
// LK-TODO - log about REMB wihtout SSRCs
s.estimateMu.Unlock()
return
}
// try to lock to track which is sending this update
for _, ssrc := range remb.SSRCs {
if ssrc == downTrack.SSRC() {
s.trackingSSRC = downTrack.SSRC()
found = true
break
}
}
if !found {
s.trackingSSRC = remb.SSRCs[0]
}
}
if s.trackingSSRC != downTrack.SSRC() {
s.estimateMu.Unlock()
return
}
s.prevReceivedEstimate = s.receivedEstimate
s.receivedEstimate = uint64(remb.Bitrate)
signal := s.maybeCommitEstimate()
s.estimateMu.Unlock()
if signal != Signal_NONE {
s.postEvent(signal, nil)
}
}
// LK-TODO-START
// Receiver report stats are not used in the current implementation.
//
// The idea is to use a loss/rtt based estimator and compare against REMB like outlined here
// https://datatracker.ietf.org/doc/html/draft-ietf-rmcat-gcc-02#section-6
//
// But the implementation could get quite tricky. So, a separate PR dedicated effort for that
// is required. Something like from Chrome, but hopefully much less complicated :-)
// https://source.chromium.org/chromium/chromium/src/+/main:third_party/webrtc/modules/congestion_controller/goog_cc/loss_based_bandwidth_estimation.cc;bpv=0;bpt=1
// LK-TODO-END
func (s *StreamAllocator) onReceiverReport(downTrack *DownTrack, rr *rtcp.ReceiverReport) {
s.tracksMu.RLock()
defer s.tracksMu.RUnlock()
if track, ok := s.tracks[downTrack.ID()]; ok {
track.UpdatePacketStats(rr)
}
}
// called when feeding track's simulcast layer availability changes
func (s *StreamAllocator) onAvailableLayersChanged(downTrack *DownTrack, layerAdded bool) {
// LK-TODO: Look at processing specific downtrack
if layerAdded {
s.postEvent(Signal_AVAILABLE_LAYERS_ADD, downTrack)
} else {
s.postEvent(Signal_AVAILABLE_LAYERS_REMOVE, downTrack)
}
}
// called when subscription settings changes
func (s *StreamAllocator) onSubscriptionChanged(downTrack *DownTrack) {
// LK-TODO: Look at processing specific downtrack
s.postEvent(Signal_SUBSCRIPTION_CHANGE, downTrack)
}
// called when DownTrack sends a packet
func (s *StreamAllocator) onPacketSent(downTrack *DownTrack, size int) {
if downTrack.Kind() == webrtc.RTPCodecTypeAudio {
return
}
s.prober.PacketSent(size)
}
// called when prober wants to send packets
func (s *StreamAllocator) onSendProbe(bytesToSend int) int {
if bytesToSend <= 0 {
return 0
}
s.tracksMu.RLock()
defer s.tracksMu.RUnlock()
bytesSent := 0
for _, track := range s.tracks {
sent := track.WritePaddingRTP(bytesToSend)
bytesSent += sent
bytesToSend -= sent
if bytesToSend <= 0 {
break
}
}
return bytesSent
}
func (s *StreamAllocator) postEvent(signal Signal, downTrack *DownTrack) {
s.chMu.RLock()
defer s.chMu.RUnlock()
if !s.isRunning() {
return
}
s.eventCh <- []Event{Event{
Signal: signal,
DownTrack: downTrack,
}}
}
func (s *StreamAllocator) processEvents() {
for events := range s.eventCh {
if events == nil {
return
}
for _, event := range events {
s.runStateMachine(event)
}
}
}
func (s *StreamAllocator) isRunning() bool {
select {
case <-s.runningCh:
return false
default:
return true
}
}
func (s *StreamAllocator) ping() {
ticker := time.NewTicker(time.Second)
for s.isRunning() {
<-ticker.C
if !s.isRunning() {
return
}
s.estimateMu.Lock()
signal := s.maybeCommitEstimate()
s.estimateMu.Unlock()
if signal != Signal_NONE {
s.postEvent(signal, nil)
}
s.postEvent(Signal_PERIODIC_PING, nil)
}
}
// LK-TODO-START
// Typically, in a system like this, there are track priorities.
// It is either implemented as policy
// Examples:
// 1. active speaker gets hi-res, all else lo-res
// 2. screen share streams get hi-res, all else lo-res
// OR it is left up to the clients to subscribe explicitly to the quality they want.
//
// When such a policy is implemented, some of the state machine behaviour needs
// to be changed. For example, in State_DEFICIENT when Signal_ADD_TRACK, it does not
// allocate immediately (it allocates only if enough time has passed since last
// estimate decrease or since the last artificial estimate boost). But, if there is
// a policy and the added track falls in the high priority bucket, an allocation
// would be required even in State_DEFICIENT to ensure higher priority streams are
// forwarded without delay.
// LK-TODO-END
// LK-TODO-START
// A better implementation would be to handle each signal as for a lot of signals,
// all the states do the same thing. But, it is written the following way for
// better readability. Will do the refactor once this code is more stable and
// more people have a chance to get familiar with it.
// LK-TODO-END
func (s *StreamAllocator) runStateMachine(event Event) {
switch s.state {
case State_PRE_COMMIT:
s.runStatePreCommit(event)
case State_STABLE:
s.runStateStable(event)
case State_DEFICIENT:
s.runStateDeficient(event)
case State_GRATUITOUS_PROBING:
s.runStateGratuitousProbing(event)
}
}
// LK-TODO-START
// Signal_ADD_TRACK is not useful. Probably can get rid of it.
// AVAILABLE_LAYERS_ADD/REMOVE should be how track start should
// be getting an allocation.
// LK-TODO-END
func (s *StreamAllocator) runStatePreCommit(event Event) {
switch event.Signal {
case Signal_ADD_TRACK:
s.allocate()
case Signal_REMOVE_TRACK:
s.allocate()
case Signal_ESTIMATE_INCREASE:
s.allocate()
case Signal_ESTIMATE_DECREASE:
s.allocate()
case Signal_RECEIVER_REPORT:
case Signal_AVAILABLE_LAYERS_ADD:
s.allocate()
case Signal_AVAILABLE_LAYERS_REMOVE:
s.allocate()
case Signal_SUBSCRIPTION_CHANGE:
s.allocate()
case Signal_PERIODIC_PING:
}
}
func (s *StreamAllocator) runStateStable(event Event) {
switch event.Signal {
case Signal_ADD_TRACK:
s.allocate()
case Signal_REMOVE_TRACK:
// LK-TODO - may want to re-calculate channel usage?
case Signal_ESTIMATE_INCREASE:
// streaming optimally, no need to do anything
case Signal_ESTIMATE_DECREASE:
s.allocate()
case Signal_RECEIVER_REPORT:
case Signal_AVAILABLE_LAYERS_ADD:
s.allocate()
case Signal_AVAILABLE_LAYERS_REMOVE:
s.allocate()
case Signal_SUBSCRIPTION_CHANGE:
s.allocate()
case Signal_PERIODIC_PING:
// if bandwidth estimate has been stable for a while, maybe gratuitously probe
s.maybeGratuitousProbe()
}
}
// LK-TODO-START
// The current implementation tries to probe using media if the allocation is not optimal.
//
// But, another option to try is using padding only to probe even when deficient.
// In the current impl, starting a new stream or moving stream to a new layer might end up
// affecting more streams, i.e. in the sense that we started a new stream and user sees
// that affecting all the streams. Using padding only means a couple of things
// 1. If padding packets get lost, no issue
// 2. From a user perspective, it will appear like a network glitch and not due to
// starting a new stream. This is not desirable (i. e. just because user does not see
// it as a direct effect of starting a stream does not mean it is preferred).
// But, something to keep in mind in terms of user perception.
// LK-TODO-END
func (s *StreamAllocator) runStateDeficient(event Event) {
switch event.Signal {
case Signal_ADD_TRACK:
s.maybeProbe()
case Signal_REMOVE_TRACK:
s.allocate()
case Signal_ESTIMATE_INCREASE:
// as long as estimate is increasing, keep going.
// Switch to STABLE state if estimate exceeds optimal bandwidth needed.
if s.getChannelCapacity() > s.getOptimalBandwidthUsage() {
s.resetBoost()
s.setState(State_STABLE)
}
case Signal_ESTIMATE_DECREASE:
// stop using the boosted estimate
s.resetBoost()
s.allocate()
case Signal_RECEIVER_REPORT:
case Signal_AVAILABLE_LAYERS_ADD:
s.maybeProbe()
case Signal_AVAILABLE_LAYERS_REMOVE:
s.allocate()
case Signal_SUBSCRIPTION_CHANGE:
s.allocate()
case Signal_PERIODIC_PING:
s.maybeProbe()
}
}
func (s *StreamAllocator) runStateGratuitousProbing(event Event) {
// for anything that needs a run of allocation, stop the prober as the traffic
// shape will be altered after an allocation. Although prober will take into
// account regular traffic, conservatively stop the prober before an allocation
// to avoid any self-inflicted damaage
switch event.Signal {
case Signal_ADD_TRACK:
s.prober.Reset()
s.allocate()
case Signal_REMOVE_TRACK:
// LK-TODO - may want to re-calculate channel usage?
case Signal_ESTIMATE_INCREASE:
// good, got a better estimate. Prober may or may not have finished.
// Let it continue if it is still running.
case Signal_ESTIMATE_DECREASE:
// stop gratuitous probing immediately and allocate
s.prober.Reset()
s.allocate()
case Signal_RECEIVER_REPORT:
case Signal_AVAILABLE_LAYERS_ADD:
s.prober.Reset()
s.allocate()
case Signal_AVAILABLE_LAYERS_REMOVE:
s.prober.Reset()
s.allocate()
case Signal_SUBSCRIPTION_CHANGE:
s.prober.Reset()
s.allocate()
case Signal_PERIODIC_PING:
if !s.prober.IsRunning() {
// try for more
s.maybeGratuitousProbe()
// LK-TODO - log about more probing here
}
}
}
func (s *StreamAllocator) setState(state State) {
if s.state != state {
// LK-TODO log state changes
}
s.state = state
}
func (s *StreamAllocator) maybeCommitEstimate() Signal {
// commit channel capacity estimate under following rules
// 1. Abs(receivedEstimate - prevReceivedEstimate) < EstimateEpsilon => estimate stable
// 2. time.Since(lastCommitTime) > EstimateCommitMs => to catch long oscillating estimate
if math.Abs(float64(s.receivedEstimate)-float64(s.prevReceivedEstimate)) > EstimateEpsilon {
// too large a change, wait for estimate to settle
return Signal_NONE
}
if time.Since(s.lastCommitTime) < EstimateCommitMs {
// don't commit too often
return Signal_NONE
}
if s.receivedEstimate == s.committedChannelCapacity {
// no change in estimate, no need to commit
return Signal_NONE
}
signal := Signal_ESTIMATE_INCREASE
if s.receivedEstimate < s.committedChannelCapacity {
signal = Signal_ESTIMATE_DECREASE
s.lastEstimateDecreaseTime = time.Now()
}
s.committedChannelCapacity = s.receivedEstimate
s.lastCommitTime = time.Now()
return signal
}
func (s *StreamAllocator) getChannelCapacity() uint64 {
s.estimateMu.RLock()
defer s.estimateMu.RUnlock()
return s.committedChannelCapacity
}
func (s *StreamAllocator) allocate() {
// LK-TODO-START
// Introduce some rules for allocate. Some thing like
// - When estimate decreases, immediately.
// Maybe have some threshold for decrease case also before triggering.
// o 5% decrease OR 200 kbps absolute decrease
// - When estimate increases
// o 10% increase - conservative in pushing more data
// o even if 10% increase, do it only once every 10/15/30 seconds
// When estimate goes up/down, there could be multiple updates. The challenge
// is to react quickly, but not too quickly.
//
// Some of the challenges here are
// - Audio packet loss in subscriber PC should be considered.
// If audio loss is too high, throttle video aggressively as
// audio quality trumps anything else in a conferencing application.
// Note that bandwidth estimation algorithms themselves
// might adjust for it and report estimated capacity.
// - Video packet loss should be taken into consideration too.
// - Especially tricky is video start/stop (either track start/stop
// or Simulcast spatial layer switching (temporal layer switching
// is fine)). That requires a key frame which is usually 10X
// the size of a P-frame. So when channel capacity goes down
// switching to a lower spatial layer could cause a temporary
// spike in bitrate exacerbating the already congested channel
// condition. This is a reason to use a Pacer in the path to
// smooth out spikes. But, a Pacer introduces significant
// overhead both in terms of memory (outgoing packets need to
// be queued) and CPU (a high frequency polling thread to drain
// the queued packets on to the wire at predictable rate)
// - Video retranmission rate should be taken into account while
// allocating to check which layer of publisher will
// fit in available bandwidth.
// - Increasing channel capacity is a tricky one. Some times,
// the bandwidth estimators will not report an increase unless
// the channel is probed with more traffic. So, may have to
// trigger an allocation if the channel is stable for a while
// and send some extra streams. Another option is to just
// send RTP padding only packets to probe the channel which
// can be done on an existing stream without re-enabling a
// stream.
// - There is also the issue of time synchronization. This makes
// debugging/simulating scenarios difficult. Basically, there
// are various kinds of delays in the system. So, when something
// really happened and when we are really responding is always
// going to be offset. So, need to be cognizant of that and
// apply necessary corrections whenever possible. For example
// o Bandwidth estimation itself takes time
// o RTCP packets could be lost
// o RTCP Receiver Report loss could have happened a while back.
// As RTCP RR usually reported once a second or so, if there
// is loss, there is no indication if that loss happened at
// the beginnning of the window or not.
// o When layer switching, there are more round trips needed.
// A PLI has to go to the publisher and publisher has to
// generate a key frame. Another very important event
// (generation of a key frame) happens potentially 100s of ms
// after we asked for it.
// In general, just need to be aware of these and tweak allocation
// to not cause oscillations.
// LK-TODO-END
//
// LK-TODO-START
// Calculate the aggregate loss. This may or may not
// be necessary depending on the algorithm we choose. In this
// pass, we could also calculate audio & video track loss
// separately and use different rules.
//
// The loss calculation should be for the window between last
// allocation and now. The `lastPackets*` field in
// `Track` structure is used to cache the packet stats
// at the last allocation. Potentially need to think about
// giving higher weight to recent losses. So, might have
// to update the `lastPackets*` periodically even when
// there is no allocation for a long time to ensure loss calculation
// remains fresh.
// LK-TODO-END
//
s.tracksMu.RLock()
//
// Ask down tracks adjust their forwarded layers.
// It is possible that tracks might all fit under boosted bandwidth scenarios.
// So, track total requested bandwidth and mark DEFICIENT state if the total is
// above the estimated channel capacity even if the optimal signal is true.
//
isOptimal := true
totalBandwidthRequested := uint64(0)
committedChannelCapacity := s.getChannelCapacity()
availableChannelCapacity := committedChannelCapacity
if availableChannelCapacity < s.boostedChannelCapacity {
availableChannelCapacity = s.boostedChannelCapacity
}
for _, track := range s.tracks {
//
// `audio` tracks will do nothing in this method.
//
// `video` tracks could do one of the following
// - no change, i. e. currently forwarding optimal available
// layer and there is enough bandwidth for that.
// - adjust layers up or down
// - mute if there is not enough capacity for any layer
// NOTE: When video switches layers, on layer switch up,
// the current layer can keep forwarding to ensure smooth
// video at the client. As layer up usually means there is
// enough bandwidth, the lower layer can keep streaming till
// the switch point for higher layer becomes available.
// But, in the other direction, higher layer forwarding should
// be stopped immediately to not further congest the channel.
//
//
bandwidthRequested, optimalBandwidthNeeded := track.AdjustAllocation(availableChannelCapacity)
totalBandwidthRequested += bandwidthRequested
if optimalBandwidthNeeded > 0 && bandwidthRequested < optimalBandwidthNeeded {
//
// Assuming this is a prioritized list of tracks
// and we are walking down in that priority order.
// Once one of those streams do not fit, set
// the availableChannelCapacity to 0 so that no
// other lower priority stream gets forwarded.
// Note that a lower priority stream may have
// a layer which might fit in the left over
// capacity. This is one type of policy
// implementation. There may be other policies
// which might allow lower priority to go through too.
// So, we need some sort of policy framework here
// to decide which streams get priority
//
availableChannelCapacity = 0
isOptimal = false
} else {
availableChannelCapacity -= bandwidthRequested
}
}
s.tracksMu.RUnlock()
if !isOptimal || totalBandwidthRequested > committedChannelCapacity {
s.setState(State_DEFICIENT)
} else {
if committedChannelCapacity != InitialChannelCapacity {
s.resetBoost()
s.setState(State_STABLE)
}
}
//
// The above loop may become a concern. In a typical conference
// kind of scenario, there are probably not that many people, so
// the number of down tracks will be limited.
//
// But, can imagine a case of roomless having a single peer
// connection between RTC node and a relay where all the streams
// (even spanning multiple rooms) are on a single peer connection.
// In that case, I think this should mostly be disabled, i. e.
// that peer connection should be looked at as RTC node's publisher
// peer connection and any throttling mechanisms should be disabled.
//
}
func (s *StreamAllocator) getExpectedBandwidthUsage() uint64 {
s.tracksMu.RLock()
defer s.tracksMu.RUnlock()
expected := uint64(0)
for _, track := range s.tracks {
expected += track.BandwidthRequested()
}
return expected
}
func (s *StreamAllocator) getOptimalBandwidthUsage() uint64 {
s.tracksMu.RLock()
defer s.tracksMu.RUnlock()
optimal := uint64(0)
for _, track := range s.tracks {
optimal += track.BandwidthOptimal()
}
return optimal
}
// LK-TODO: unused till loss based estimation is done, but just a sample impl of weighting audio higher
func (s *StreamAllocator) calculateLoss() float32 {
s.tracksMu.RLock()
defer s.tracksMu.RUnlock()
packetsAudio := uint32(0)
packetsLostAudio := uint32(0)
packetsVideo := uint32(0)
packetsLostVideo := uint32(0)
for _, track := range s.tracks {
kind, packets, packetsLost := track.GetPacketStats()
if kind == webrtc.RTPCodecTypeAudio {
packetsAudio += packets
packetsLostAudio += packetsLost
}
if kind == webrtc.RTPCodecTypeVideo {
packetsVideo += packets
packetsLostVideo += packetsLost
}
}
audioLossPct := float32(0.0)
if packetsAudio != 0 {
audioLossPct = (float32(packetsLostAudio) * 100.0) / float32(packetsAudio)
}
videoLossPct := float32(0.0)
if packetsVideo != 0 {
videoLossPct = (float32(packetsLostVideo) * 100.0) / float32(packetsVideo)
}
return AudioLossWeight*audioLossPct + VideoLossWeight*videoLossPct
}
func (s *StreamAllocator) maybeProbe() {
if !s.isTimeToBoost() {
return
}
switch s.boostMode {
case BoostMode_LAYER:
s.maybeBoostLayer()
case BoostMode_BANDWIDTH:
s.maybeBoostBandwidth()
}
}
func (s *StreamAllocator) maybeBoostLayer() {
s.tracksMu.RLock()
for _, track := range s.tracks {
boosted, additionalBandwidth := track.IncreaseAllocation()
if boosted {
if s.boostedChannelCapacity > s.committedChannelCapacity {
s.boostedChannelCapacity += additionalBandwidth
} else {
s.boostedChannelCapacity = s.committedChannelCapacity + additionalBandwidth
}
s.lastBoostTime = time.Now()
break
}
}
s.tracksMu.RUnlock()
}
func (s *StreamAllocator) maybeBoostBandwidth() {
// temporarily boost estimate for probing.
// Boost either the committed channel capacity or previous boost point if there is one
baseBps := s.getChannelCapacity()
if baseBps < s.boostedChannelCapacity {
baseBps = s.boostedChannelCapacity
}
boostBps := (baseBps * BoostPct) / 100
if boostBps < BoostMinBps {
boostBps = BoostMinBps
}
if boostBps > BoostMaxBps {
boostBps = BoostMaxBps
}
s.boostedChannelCapacity = baseBps + boostBps
s.lastBoostTime = time.Now()
s.allocate()
}
func (s *StreamAllocator) isTimeToBoost() bool {
// if enough time has passed since last esitmate drop or last estimate boost,
// artificially boost estimate before allocating.
// Checking against last estimate boost prevents multiple artificial boosts
// in situations where multiple tracks become available in a short span.
if !s.lastBoostTime.IsZero() {
return time.Since(s.lastBoostTime) > BoostWaitMs
} else {
return time.Since(s.lastEstimateDecreaseTime) > ProbeWaitMs
}
}
func (s *StreamAllocator) resetBoost() {
s.lastBoostTime = time.UnixMilli(0)
s.boostedChannelCapacity = 0
}
func (s *StreamAllocator) maybeGratuitousProbe() {
if time.Since(s.lastEstimateDecreaseTime) < GratuitousProbeWaitMs {
return
}
committedChannelCapacity := s.getChannelCapacity()
expectedRateBps := s.getExpectedBandwidthUsage()
headroomBps := committedChannelCapacity - expectedRateBps
if headroomBps > GratuitousProbeHeadroomBps {
return
}
probeRateBps := (committedChannelCapacity * GratuitousProbePct) / 100
if probeRateBps > GratuitousProbeMaxBps {
probeRateBps = GratuitousProbeMaxBps
}
s.prober.AddCluster(int(committedChannelCapacity+probeRateBps), int(expectedRateBps), GratuitousProbeMinDurationMs, GratuitousProbeMaxDurationMs)
s.setState(State_GRATUITOUS_PROBING)
}
type Track struct {
// LK-TODO-START
// Check if we can do without a lock?
//
// Packet stats are updated in a different thread.
// Maybe a specific lock for that?
// There may be more in the future though.
// LK-TODO-END
lock sync.RWMutex
downTrack *DownTrack
highestSN uint32
packetsLost uint32
lastHighestSN uint32
lastPacketsLost uint32
bandwidthRequested uint64
optimalBandwidthNeeded uint64
}
func NewTrack(downTrack *DownTrack) *Track {
return &Track{
downTrack: downTrack,
}
}
// LK-TODO this should probably be maintained in downTrack and this module can query what it needs
func (t *Track) UpdatePacketStats(rr *rtcp.ReceiverReport) {
t.lock.Lock()
defer t.lock.Unlock()
t.lastHighestSN = t.highestSN
t.lastPacketsLost = t.packetsLost
for _, report := range rr.Reports {
if report.LastSequenceNumber > t.highestSN {
t.highestSN = report.LastSequenceNumber
}
if report.TotalLost > t.packetsLost {
t.packetsLost = report.TotalLost
}
}
}
func (t *Track) GetPacketStats() (webrtc.RTPCodecType, uint32, uint32) {
t.lock.RLock()
defer t.lock.RUnlock()
return t.downTrack.Kind(), t.highestSN - t.lastHighestSN, t.packetsLost - t.lastPacketsLost
}
func (t *Track) WritePaddingRTP(bytesToSend int) int {
return t.downTrack.WritePaddingRTP(bytesToSend)
}
func (t *Track) AdjustAllocation(availableChannelCapacity uint64) (uint64, uint64) {
t.bandwidthRequested, t.optimalBandwidthNeeded = t.downTrack.AdjustAllocation(availableChannelCapacity)
return t.bandwidthRequested, t.optimalBandwidthNeeded
}
func (t *Track) IncreaseAllocation() (bool, uint64) {
increased, bandwidthRequested, optimalBandwidthNeeded := t.downTrack.IncreaseAllocation()
additionalBandwidth := 0
if increased {
additionalBandwidth = int(bandwidthRequested) - int(t.bandwidthRequested)
if additionalBandwidth < 0 {
additionalBandwidth = 0
}
t.bandwidthRequested = bandwidthRequested
t.optimalBandwidthNeeded = optimalBandwidthNeeded
}
return increased, uint64(additionalBandwidth)
}
func (t *Track) BandwidthRequested() uint64 {
return t.bandwidthRequested
}
func (t *Track) BandwidthOptimal() uint64 {
return t.optimalBandwidthNeeded
}