Inject silence opus frames on mute (#682)

* Inject silence opus frames on mute.

If not, under certain circumstances, residual noise
seems to trigger comfort noise generation at the decoder
which is not all that comfortable.

* inject silence only when muting

* Add comment

* augment comment

* Delete blank line

* Adjust TS offset on blank frames

* Remove debug

* Do not modify `lastTS` as it affects timestamp on next switch.

* use duration to calculate number of blank frames

* log errors in blank frames write path
This commit is contained in:
Raja Subramanian
2022-05-15 11:29:40 +05:30
committed by GitHub
parent fa53da18e7
commit d828e0fbd8
4 changed files with 90 additions and 33 deletions
+73 -14
View File
@@ -39,7 +39,7 @@ type TrackSender interface {
const (
RTPPaddingMaxPayloadSize = 255
RTPPaddingEstimatedHeaderSize = 20
RTPBlankFramesMax = 6
RTPBlankFramesSeconds = float32(0.2)
FlagStopRTXOnPLI = true
@@ -61,13 +61,39 @@ var (
)
var (
VP8KeyFrame8x8 = []byte{0x10, 0x02, 0x00, 0x9d, 0x01, 0x2a, 0x08, 0x00, 0x08, 0x00, 0x00, 0x47, 0x08, 0x85, 0x85, 0x88, 0x85, 0x84, 0x88, 0x02, 0x02, 0x00, 0x0c, 0x0d, 0x60, 0x00, 0xfe, 0xff, 0xab, 0x50, 0x80}
H264KeyFrame2x2SPS = []byte{0x67, 0x42, 0xc0, 0x1f, 0x0f, 0xd9, 0x1f, 0x88, 0x88, 0x84, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0xc8, 0x3c, 0x60, 0xc9, 0x20}
H264KeyFrame2x2PPS = []byte{0x68, 0x87, 0xcb, 0x83, 0xcb, 0x20}
H264KeyFrame2x2IDR = []byte{0x65, 0x88, 0x84, 0x0a, 0xf2, 0x62, 0x80, 0x00, 0xa7, 0xbe}
VP8KeyFrame8x8 = []byte{
0x10, 0x02, 0x00, 0x9d, 0x01, 0x2a, 0x08, 0x00,
0x08, 0x00, 0x00, 0x47, 0x08, 0x85, 0x85, 0x88,
0x85, 0x84, 0x88, 0x02, 0x02, 0x00, 0x0c, 0x0d,
0x60, 0x00, 0xfe, 0xff, 0xab, 0x50, 0x80,
}
H264KeyFrame2x2SPS = []byte{
0x67, 0x42, 0xc0, 0x1f, 0x0f, 0xd9, 0x1f, 0x88,
0x88, 0x84, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00,
0x00, 0x03, 0x00, 0xc8, 0x3c, 0x60, 0xc9, 0x20,
}
H264KeyFrame2x2PPS = []byte{
0x68, 0x87, 0xcb, 0x83, 0xcb, 0x20,
}
H264KeyFrame2x2IDR = []byte{
0x65, 0x88, 0x84, 0x0a, 0xf2, 0x62, 0x80, 0x00,
0xa7, 0xbe,
}
H264KeyFrame2x2 = [][]byte{H264KeyFrame2x2SPS, H264KeyFrame2x2PPS, H264KeyFrame2x2IDR}
OpusSilenceFrame = []byte{
0xf8, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
}
)
type ReceiverReportListener func(dt *DownTrack, report *rtcp.ReceiverReport)
@@ -552,6 +578,13 @@ func (d *DownTrack) Mute(muted bool) {
if d.onSubscriptionChanged != nil {
d.onSubscriptionChanged(d)
}
// when muting, send a few silence frames to ensure residual noise does not
// put the comfort noise generator on decoder side in a bad state where it
// generates noise that is not so comfortable.
if d.kind == webrtc.RTPCodecTypeAudio && muted {
_ = d.writeBlankFrameRTP(RTPBlankFramesSeconds)
}
}
func (d *DownTrack) Close() {
@@ -576,7 +609,7 @@ func (d *DownTrack) CloseWithFlush(flush bool) {
doneFlushing := make(chan struct{})
go func() {
defer close(doneFlushing)
_ = d.writeBlankFrameRTP()
_ = d.writeBlankFrameRTP(RTPBlankFramesSeconds)
}()
// wait a limited time to flush
@@ -821,25 +854,32 @@ func (d *DownTrack) CreateSenderReport() *rtcp.SenderReport {
return d.rtpStats.GetRtcpSenderReport(d.ssrc)
}
func (d *DownTrack) writeBlankFrameRTP() error {
func (d *DownTrack) writeBlankFrameRTP(duration float32) error {
// don't send if nothing has been sent
if !d.rtpStats.IsActive() {
return nil
}
// LK-TODO: Support other video codecs
if d.kind == webrtc.RTPCodecTypeAudio || (d.mime != "video/vp8" && d.mime != "video/h264") {
// LK-TODO: Support other audio/video codecs
if d.mime != "audio/opus" && d.mime != "video/vp8" && d.mime != "video/h264" {
return nil
}
snts, frameEndNeeded, err := d.forwarder.GetSnTsForBlankFrames()
if err != nil {
return err
frameRate := uint32(30)
if d.mime == "audio/opus" {
frameRate = 50
}
// send a number of blank frames just in case there is loss.
// Intentionally ignoring check for mute or bandwidth constrained mute
// as this is used to clear client side buffer.
numFrames := int(float32(frameRate) * duration)
snts, frameEndNeeded, err := d.forwarder.GetSnTsForBlankFrames(frameRate, numFrames)
if err != nil {
d.logger.Warnw("could not get SN/TS to write blank frames", err)
return err
}
for i := 0; i < len(snts); i++ {
hdr := rtp.Header{
Version: 2,
@@ -854,11 +894,14 @@ func (d *DownTrack) writeBlankFrameRTP() error {
err = d.writeRTPHeaderExtensions(&hdr)
if err != nil {
d.logger.Warnw("could not write header extensions for blank frame", err)
return err
}
var pktSize int
switch d.mime {
case "audio/opus":
pktSize, err = d.writeOpusBlankFrame(&hdr, frameEndNeeded)
case "video/vp8":
pktSize, err = d.writeVP8BlankFrame(&hdr, frameEndNeeded)
case "video/h264":
@@ -867,6 +910,7 @@ func (d *DownTrack) writeBlankFrameRTP() error {
return nil
}
if err != nil {
d.logger.Warnw("could not write blank frame", err)
return err
}
@@ -882,10 +926,25 @@ func (d *DownTrack) writeBlankFrameRTP() error {
return nil
}
func (d *DownTrack) writeOpusBlankFrame(hdr *rtp.Header, frameEndNeeded bool) (int, error) {
// silence frame
// Used shortly after muting to ensure residual noise does not keep
// generating noise at the decoder after the stream is stopped
// i. e. comfort noise generation actually not producing something comfortable.
payload := make([]byte, len(OpusSilenceFrame))
copy(payload[0:], OpusSilenceFrame)
_, err := d.writeStream.WriteRTP(hdr, payload)
if err == nil {
d.rtpStats.Update(hdr, len(payload), 0, time.Now().UnixNano())
}
return hdr.MarshalSize() + len(payload), err
}
func (d *DownTrack) writeVP8BlankFrame(hdr *rtp.Header, frameEndNeeded bool) (int, error) {
blankVP8 := d.forwarder.GetPaddingVP8(frameEndNeeded)
// 1x1 key frame
// 8x8 key frame
// Used even when closing out a previous frame. Looks like receivers
// do not care about content (it will probably end up being an undecodable
// frame, but that should be okay as there are key frames following)
+3 -4
View File
@@ -1268,16 +1268,15 @@ func (f *Forwarder) GetSnTsForPadding(num int) ([]SnTs, error) {
return f.rtpMunger.UpdateAndGetPaddingSnTs(num, 0, 0, forceMarker)
}
func (f *Forwarder) GetSnTsForBlankFrames() ([]SnTs, bool, error) {
func (f *Forwarder) GetSnTsForBlankFrames(frameRate uint32, numPackets int) ([]SnTs, bool, error) {
f.lock.Lock()
defer f.lock.Unlock()
num := RTPBlankFramesMax
frameEndNeeded := !f.rtpMunger.IsOnFrameBoundary()
if frameEndNeeded {
num++
numPackets++
}
snts, err := f.rtpMunger.UpdateAndGetPaddingSnTs(num, f.codec.ClockRate, 30, frameEndNeeded)
snts, err := f.rtpMunger.UpdateAndGetPaddingSnTs(numPackets, f.codec.ClockRate, frameRate, frameEndNeeded)
return snts, frameEndNeeded, err
}
+10 -11
View File
@@ -1382,37 +1382,36 @@ func TestForwardGetSnTsForBlankFrames(t *testing.T) {
_, _ = f.GetTranslationParams(extPkt, 0)
// should get back frame end needed as the last packet did not have RTP marker set
snts, frameEndNeeded, err := f.GetSnTsForBlankFrames()
snts, frameEndNeeded, err := f.GetSnTsForBlankFrames(30, 6)
require.NoError(t, err)
require.True(t, frameEndNeeded)
// there should be one more than RTPBlankFramesMax as one would have been allocated to end previous frame
numPadding := RTPBlankFramesMax + 1
numPadding := 7
clockRate := testutils.TestVP8Codec.ClockRate
frameRate := uint32(30)
var sntsExpected = make([]SnTs, numPadding)
for i := 0; i < numPadding; i++ {
sntsExpected[i] = SnTs{
sequenceNumber: 23333 + uint16(i) + 1,
timestamp: 0xabcdef + (uint32(i)*clockRate)/frameRate,
sequenceNumber: params.SequenceNumber + uint16(i) + 1,
timestamp: params.Timestamp + (uint32(i)*clockRate)/frameRate,
}
}
require.Equal(t, sntsExpected, snts)
// now that there is a marker, timestamp should jump on first padding when asked again
// also number of padding should be RTPBlankFramesMax
snts, frameEndNeeded, err = f.GetSnTsForBlankFrames()
require.NoError(t, err)
require.False(t, frameEndNeeded)
numPadding = RTPBlankFramesMax
numPadding = 6
sntsExpected = sntsExpected[:numPadding]
for i := 0; i < numPadding; i++ {
sntsExpected[i] = SnTs{
sequenceNumber: 23340 + uint16(i) + 1,
timestamp: 0xabcdef + (uint32(i+1)*clockRate)/frameRate,
sequenceNumber: params.SequenceNumber + uint16(len(snts)) + uint16(i) + 1,
timestamp: params.Timestamp + (uint32(i+1)*clockRate)/frameRate,
}
}
snts, frameEndNeeded, err = f.GetSnTsForBlankFrames(30, 6)
require.NoError(t, err)
require.False(t, frameEndNeeded)
require.Equal(t, sntsExpected, snts)
}
+4 -4
View File
@@ -305,8 +305,8 @@ func TestUpdateAndGetPaddingSnTs(t *testing.T) {
var sntsExpected = make([]SnTs, numPadding)
for i := 0; i < numPadding; i++ {
sntsExpected[i] = SnTs{
sequenceNumber: 23333 + uint16(i) + 1,
timestamp: 0xabcdef + (uint32(i)*clockRate)/frameRate,
sequenceNumber: params.SequenceNumber + uint16(i) + 1,
timestamp: params.Timestamp + (uint32(i)*clockRate)/frameRate,
}
}
snts, err := r.UpdateAndGetPaddingSnTs(numPadding, clockRate, frameRate, true)
@@ -316,8 +316,8 @@ func TestUpdateAndGetPaddingSnTs(t *testing.T) {
// now that there is a marker, timestamp should jump on first padding when asked again
for i := 0; i < numPadding; i++ {
sntsExpected[i] = SnTs{
sequenceNumber: 23343 + uint16(i) + 1,
timestamp: 0xabcdef + (uint32(i+1)*clockRate)/frameRate,
sequenceNumber: params.SequenceNumber + uint16(len(snts)) + uint16(i) + 1,
timestamp: params.Timestamp + (uint32(i+1)*clockRate)/frameRate,
}
}
snts, err = r.UpdateAndGetPaddingSnTs(numPadding, clockRate, frameRate, false)