From d828e0fbd8a1f41fe6b3e847200227541f1d4458 Mon Sep 17 00:00:00 2001 From: Raja Subramanian Date: Sun, 15 May 2022 11:29:40 +0530 Subject: [PATCH] Inject silence opus frames on mute (#682) * Inject silence opus frames on mute. If not, under certain circumstances, residual noise seems to trigger comfort noise generation at the decoder which is not all that comfortable. * inject silence only when muting * Add comment * augment comment * Delete blank line * Adjust TS offset on blank frames * Remove debug * Do not modify `lastTS` as it affects timestamp on next switch. * use duration to calculate number of blank frames * log errors in blank frames write path --- pkg/sfu/downtrack.go | 87 ++++++++++++++++++++++++++++++++------- pkg/sfu/forwarder.go | 7 ++-- pkg/sfu/forwarder_test.go | 21 +++++----- pkg/sfu/rtpmunger_test.go | 8 ++-- 4 files changed, 90 insertions(+), 33 deletions(-) diff --git a/pkg/sfu/downtrack.go b/pkg/sfu/downtrack.go index 6f6774028..865d5563f 100644 --- a/pkg/sfu/downtrack.go +++ b/pkg/sfu/downtrack.go @@ -39,7 +39,7 @@ type TrackSender interface { const ( RTPPaddingMaxPayloadSize = 255 RTPPaddingEstimatedHeaderSize = 20 - RTPBlankFramesMax = 6 + RTPBlankFramesSeconds = float32(0.2) FlagStopRTXOnPLI = true @@ -61,13 +61,39 @@ var ( ) var ( - VP8KeyFrame8x8 = []byte{0x10, 0x02, 0x00, 0x9d, 0x01, 0x2a, 0x08, 0x00, 0x08, 0x00, 0x00, 0x47, 0x08, 0x85, 0x85, 0x88, 0x85, 0x84, 0x88, 0x02, 0x02, 0x00, 0x0c, 0x0d, 0x60, 0x00, 0xfe, 0xff, 0xab, 0x50, 0x80} - - H264KeyFrame2x2SPS = []byte{0x67, 0x42, 0xc0, 0x1f, 0x0f, 0xd9, 0x1f, 0x88, 0x88, 0x84, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0xc8, 0x3c, 0x60, 0xc9, 0x20} - H264KeyFrame2x2PPS = []byte{0x68, 0x87, 0xcb, 0x83, 0xcb, 0x20} - H264KeyFrame2x2IDR = []byte{0x65, 0x88, 0x84, 0x0a, 0xf2, 0x62, 0x80, 0x00, 0xa7, 0xbe} + VP8KeyFrame8x8 = []byte{ + 0x10, 0x02, 0x00, 0x9d, 0x01, 0x2a, 0x08, 0x00, + 0x08, 0x00, 0x00, 0x47, 0x08, 0x85, 0x85, 0x88, + 0x85, 0x84, 0x88, 0x02, 0x02, 0x00, 0x0c, 0x0d, + 0x60, 0x00, 0xfe, 0xff, 0xab, 0x50, 0x80, + } + H264KeyFrame2x2SPS = []byte{ + 0x67, 0x42, 0xc0, 0x1f, 0x0f, 0xd9, 0x1f, 0x88, + 0x88, 0x84, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00, + 0x00, 0x03, 0x00, 0xc8, 0x3c, 0x60, 0xc9, 0x20, + } + H264KeyFrame2x2PPS = []byte{ + 0x68, 0x87, 0xcb, 0x83, 0xcb, 0x20, + } + H264KeyFrame2x2IDR = []byte{ + 0x65, 0x88, 0x84, 0x0a, 0xf2, 0x62, 0x80, 0x00, + 0xa7, 0xbe, + } H264KeyFrame2x2 = [][]byte{H264KeyFrame2x2SPS, H264KeyFrame2x2PPS, H264KeyFrame2x2IDR} + + OpusSilenceFrame = []byte{ + 0xf8, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + } ) type ReceiverReportListener func(dt *DownTrack, report *rtcp.ReceiverReport) @@ -552,6 +578,13 @@ func (d *DownTrack) Mute(muted bool) { if d.onSubscriptionChanged != nil { d.onSubscriptionChanged(d) } + + // when muting, send a few silence frames to ensure residual noise does not + // put the comfort noise generator on decoder side in a bad state where it + // generates noise that is not so comfortable. + if d.kind == webrtc.RTPCodecTypeAudio && muted { + _ = d.writeBlankFrameRTP(RTPBlankFramesSeconds) + } } func (d *DownTrack) Close() { @@ -576,7 +609,7 @@ func (d *DownTrack) CloseWithFlush(flush bool) { doneFlushing := make(chan struct{}) go func() { defer close(doneFlushing) - _ = d.writeBlankFrameRTP() + _ = d.writeBlankFrameRTP(RTPBlankFramesSeconds) }() // wait a limited time to flush @@ -821,25 +854,32 @@ func (d *DownTrack) CreateSenderReport() *rtcp.SenderReport { return d.rtpStats.GetRtcpSenderReport(d.ssrc) } -func (d *DownTrack) writeBlankFrameRTP() error { +func (d *DownTrack) writeBlankFrameRTP(duration float32) error { // don't send if nothing has been sent if !d.rtpStats.IsActive() { return nil } - // LK-TODO: Support other video codecs - if d.kind == webrtc.RTPCodecTypeAudio || (d.mime != "video/vp8" && d.mime != "video/h264") { + // LK-TODO: Support other audio/video codecs + if d.mime != "audio/opus" && d.mime != "video/vp8" && d.mime != "video/h264" { return nil } - snts, frameEndNeeded, err := d.forwarder.GetSnTsForBlankFrames() - if err != nil { - return err + frameRate := uint32(30) + if d.mime == "audio/opus" { + frameRate = 50 } // send a number of blank frames just in case there is loss. // Intentionally ignoring check for mute or bandwidth constrained mute // as this is used to clear client side buffer. + numFrames := int(float32(frameRate) * duration) + snts, frameEndNeeded, err := d.forwarder.GetSnTsForBlankFrames(frameRate, numFrames) + if err != nil { + d.logger.Warnw("could not get SN/TS to write blank frames", err) + return err + } + for i := 0; i < len(snts); i++ { hdr := rtp.Header{ Version: 2, @@ -854,11 +894,14 @@ func (d *DownTrack) writeBlankFrameRTP() error { err = d.writeRTPHeaderExtensions(&hdr) if err != nil { + d.logger.Warnw("could not write header extensions for blank frame", err) return err } var pktSize int switch d.mime { + case "audio/opus": + pktSize, err = d.writeOpusBlankFrame(&hdr, frameEndNeeded) case "video/vp8": pktSize, err = d.writeVP8BlankFrame(&hdr, frameEndNeeded) case "video/h264": @@ -867,6 +910,7 @@ func (d *DownTrack) writeBlankFrameRTP() error { return nil } if err != nil { + d.logger.Warnw("could not write blank frame", err) return err } @@ -882,10 +926,25 @@ func (d *DownTrack) writeBlankFrameRTP() error { return nil } +func (d *DownTrack) writeOpusBlankFrame(hdr *rtp.Header, frameEndNeeded bool) (int, error) { + // silence frame + // Used shortly after muting to ensure residual noise does not keep + // generating noise at the decoder after the stream is stopped + // i. e. comfort noise generation actually not producing something comfortable. + payload := make([]byte, len(OpusSilenceFrame)) + copy(payload[0:], OpusSilenceFrame) + + _, err := d.writeStream.WriteRTP(hdr, payload) + if err == nil { + d.rtpStats.Update(hdr, len(payload), 0, time.Now().UnixNano()) + } + return hdr.MarshalSize() + len(payload), err +} + func (d *DownTrack) writeVP8BlankFrame(hdr *rtp.Header, frameEndNeeded bool) (int, error) { blankVP8 := d.forwarder.GetPaddingVP8(frameEndNeeded) - // 1x1 key frame + // 8x8 key frame // Used even when closing out a previous frame. Looks like receivers // do not care about content (it will probably end up being an undecodable // frame, but that should be okay as there are key frames following) diff --git a/pkg/sfu/forwarder.go b/pkg/sfu/forwarder.go index b315f67e5..318bf62dd 100644 --- a/pkg/sfu/forwarder.go +++ b/pkg/sfu/forwarder.go @@ -1268,16 +1268,15 @@ func (f *Forwarder) GetSnTsForPadding(num int) ([]SnTs, error) { return f.rtpMunger.UpdateAndGetPaddingSnTs(num, 0, 0, forceMarker) } -func (f *Forwarder) GetSnTsForBlankFrames() ([]SnTs, bool, error) { +func (f *Forwarder) GetSnTsForBlankFrames(frameRate uint32, numPackets int) ([]SnTs, bool, error) { f.lock.Lock() defer f.lock.Unlock() - num := RTPBlankFramesMax frameEndNeeded := !f.rtpMunger.IsOnFrameBoundary() if frameEndNeeded { - num++ + numPackets++ } - snts, err := f.rtpMunger.UpdateAndGetPaddingSnTs(num, f.codec.ClockRate, 30, frameEndNeeded) + snts, err := f.rtpMunger.UpdateAndGetPaddingSnTs(numPackets, f.codec.ClockRate, frameRate, frameEndNeeded) return snts, frameEndNeeded, err } diff --git a/pkg/sfu/forwarder_test.go b/pkg/sfu/forwarder_test.go index a4ac105ca..0d5db471a 100644 --- a/pkg/sfu/forwarder_test.go +++ b/pkg/sfu/forwarder_test.go @@ -1382,37 +1382,36 @@ func TestForwardGetSnTsForBlankFrames(t *testing.T) { _, _ = f.GetTranslationParams(extPkt, 0) // should get back frame end needed as the last packet did not have RTP marker set - snts, frameEndNeeded, err := f.GetSnTsForBlankFrames() + snts, frameEndNeeded, err := f.GetSnTsForBlankFrames(30, 6) require.NoError(t, err) require.True(t, frameEndNeeded) // there should be one more than RTPBlankFramesMax as one would have been allocated to end previous frame - numPadding := RTPBlankFramesMax + 1 + numPadding := 7 clockRate := testutils.TestVP8Codec.ClockRate frameRate := uint32(30) var sntsExpected = make([]SnTs, numPadding) for i := 0; i < numPadding; i++ { sntsExpected[i] = SnTs{ - sequenceNumber: 23333 + uint16(i) + 1, - timestamp: 0xabcdef + (uint32(i)*clockRate)/frameRate, + sequenceNumber: params.SequenceNumber + uint16(i) + 1, + timestamp: params.Timestamp + (uint32(i)*clockRate)/frameRate, } } require.Equal(t, sntsExpected, snts) // now that there is a marker, timestamp should jump on first padding when asked again // also number of padding should be RTPBlankFramesMax - snts, frameEndNeeded, err = f.GetSnTsForBlankFrames() - require.NoError(t, err) - require.False(t, frameEndNeeded) - - numPadding = RTPBlankFramesMax + numPadding = 6 sntsExpected = sntsExpected[:numPadding] for i := 0; i < numPadding; i++ { sntsExpected[i] = SnTs{ - sequenceNumber: 23340 + uint16(i) + 1, - timestamp: 0xabcdef + (uint32(i+1)*clockRate)/frameRate, + sequenceNumber: params.SequenceNumber + uint16(len(snts)) + uint16(i) + 1, + timestamp: params.Timestamp + (uint32(i+1)*clockRate)/frameRate, } } + snts, frameEndNeeded, err = f.GetSnTsForBlankFrames(30, 6) + require.NoError(t, err) + require.False(t, frameEndNeeded) require.Equal(t, sntsExpected, snts) } diff --git a/pkg/sfu/rtpmunger_test.go b/pkg/sfu/rtpmunger_test.go index cff7fa06c..798e03ace 100644 --- a/pkg/sfu/rtpmunger_test.go +++ b/pkg/sfu/rtpmunger_test.go @@ -305,8 +305,8 @@ func TestUpdateAndGetPaddingSnTs(t *testing.T) { var sntsExpected = make([]SnTs, numPadding) for i := 0; i < numPadding; i++ { sntsExpected[i] = SnTs{ - sequenceNumber: 23333 + uint16(i) + 1, - timestamp: 0xabcdef + (uint32(i)*clockRate)/frameRate, + sequenceNumber: params.SequenceNumber + uint16(i) + 1, + timestamp: params.Timestamp + (uint32(i)*clockRate)/frameRate, } } snts, err := r.UpdateAndGetPaddingSnTs(numPadding, clockRate, frameRate, true) @@ -316,8 +316,8 @@ func TestUpdateAndGetPaddingSnTs(t *testing.T) { // now that there is a marker, timestamp should jump on first padding when asked again for i := 0; i < numPadding; i++ { sntsExpected[i] = SnTs{ - sequenceNumber: 23343 + uint16(i) + 1, - timestamp: 0xabcdef + (uint32(i+1)*clockRate)/frameRate, + sequenceNumber: params.SequenceNumber + uint16(len(snts)) + uint16(i) + 1, + timestamp: params.Timestamp + (uint32(i+1)*clockRate)/frameRate, } } snts, err = r.UpdateAndGetPaddingSnTs(numPadding, clockRate, frameRate, false)