mirror of
https://github.com/livekit/livekit.git
synced 2026-05-26 09:45:56 +00:00
Inject silence opus frames on mute (#682)
* Inject silence opus frames on mute. If not, under certain circumstances, residual noise seems to trigger comfort noise generation at the decoder which is not all that comfortable. * inject silence only when muting * Add comment * augment comment * Delete blank line * Adjust TS offset on blank frames * Remove debug * Do not modify `lastTS` as it affects timestamp on next switch. * use duration to calculate number of blank frames * log errors in blank frames write path
This commit is contained in:
+73
-14
@@ -39,7 +39,7 @@ type TrackSender interface {
|
||||
const (
|
||||
RTPPaddingMaxPayloadSize = 255
|
||||
RTPPaddingEstimatedHeaderSize = 20
|
||||
RTPBlankFramesMax = 6
|
||||
RTPBlankFramesSeconds = float32(0.2)
|
||||
|
||||
FlagStopRTXOnPLI = true
|
||||
|
||||
@@ -61,13 +61,39 @@ var (
|
||||
)
|
||||
|
||||
var (
|
||||
VP8KeyFrame8x8 = []byte{0x10, 0x02, 0x00, 0x9d, 0x01, 0x2a, 0x08, 0x00, 0x08, 0x00, 0x00, 0x47, 0x08, 0x85, 0x85, 0x88, 0x85, 0x84, 0x88, 0x02, 0x02, 0x00, 0x0c, 0x0d, 0x60, 0x00, 0xfe, 0xff, 0xab, 0x50, 0x80}
|
||||
|
||||
H264KeyFrame2x2SPS = []byte{0x67, 0x42, 0xc0, 0x1f, 0x0f, 0xd9, 0x1f, 0x88, 0x88, 0x84, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0xc8, 0x3c, 0x60, 0xc9, 0x20}
|
||||
H264KeyFrame2x2PPS = []byte{0x68, 0x87, 0xcb, 0x83, 0xcb, 0x20}
|
||||
H264KeyFrame2x2IDR = []byte{0x65, 0x88, 0x84, 0x0a, 0xf2, 0x62, 0x80, 0x00, 0xa7, 0xbe}
|
||||
VP8KeyFrame8x8 = []byte{
|
||||
0x10, 0x02, 0x00, 0x9d, 0x01, 0x2a, 0x08, 0x00,
|
||||
0x08, 0x00, 0x00, 0x47, 0x08, 0x85, 0x85, 0x88,
|
||||
0x85, 0x84, 0x88, 0x02, 0x02, 0x00, 0x0c, 0x0d,
|
||||
0x60, 0x00, 0xfe, 0xff, 0xab, 0x50, 0x80,
|
||||
}
|
||||
|
||||
H264KeyFrame2x2SPS = []byte{
|
||||
0x67, 0x42, 0xc0, 0x1f, 0x0f, 0xd9, 0x1f, 0x88,
|
||||
0x88, 0x84, 0x00, 0x00, 0x03, 0x00, 0x04, 0x00,
|
||||
0x00, 0x03, 0x00, 0xc8, 0x3c, 0x60, 0xc9, 0x20,
|
||||
}
|
||||
H264KeyFrame2x2PPS = []byte{
|
||||
0x68, 0x87, 0xcb, 0x83, 0xcb, 0x20,
|
||||
}
|
||||
H264KeyFrame2x2IDR = []byte{
|
||||
0x65, 0x88, 0x84, 0x0a, 0xf2, 0x62, 0x80, 0x00,
|
||||
0xa7, 0xbe,
|
||||
}
|
||||
H264KeyFrame2x2 = [][]byte{H264KeyFrame2x2SPS, H264KeyFrame2x2PPS, H264KeyFrame2x2IDR}
|
||||
|
||||
OpusSilenceFrame = []byte{
|
||||
0xf8, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
}
|
||||
)
|
||||
|
||||
type ReceiverReportListener func(dt *DownTrack, report *rtcp.ReceiverReport)
|
||||
@@ -552,6 +578,13 @@ func (d *DownTrack) Mute(muted bool) {
|
||||
if d.onSubscriptionChanged != nil {
|
||||
d.onSubscriptionChanged(d)
|
||||
}
|
||||
|
||||
// when muting, send a few silence frames to ensure residual noise does not
|
||||
// put the comfort noise generator on decoder side in a bad state where it
|
||||
// generates noise that is not so comfortable.
|
||||
if d.kind == webrtc.RTPCodecTypeAudio && muted {
|
||||
_ = d.writeBlankFrameRTP(RTPBlankFramesSeconds)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *DownTrack) Close() {
|
||||
@@ -576,7 +609,7 @@ func (d *DownTrack) CloseWithFlush(flush bool) {
|
||||
doneFlushing := make(chan struct{})
|
||||
go func() {
|
||||
defer close(doneFlushing)
|
||||
_ = d.writeBlankFrameRTP()
|
||||
_ = d.writeBlankFrameRTP(RTPBlankFramesSeconds)
|
||||
}()
|
||||
|
||||
// wait a limited time to flush
|
||||
@@ -821,25 +854,32 @@ func (d *DownTrack) CreateSenderReport() *rtcp.SenderReport {
|
||||
return d.rtpStats.GetRtcpSenderReport(d.ssrc)
|
||||
}
|
||||
|
||||
func (d *DownTrack) writeBlankFrameRTP() error {
|
||||
func (d *DownTrack) writeBlankFrameRTP(duration float32) error {
|
||||
// don't send if nothing has been sent
|
||||
if !d.rtpStats.IsActive() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// LK-TODO: Support other video codecs
|
||||
if d.kind == webrtc.RTPCodecTypeAudio || (d.mime != "video/vp8" && d.mime != "video/h264") {
|
||||
// LK-TODO: Support other audio/video codecs
|
||||
if d.mime != "audio/opus" && d.mime != "video/vp8" && d.mime != "video/h264" {
|
||||
return nil
|
||||
}
|
||||
|
||||
snts, frameEndNeeded, err := d.forwarder.GetSnTsForBlankFrames()
|
||||
if err != nil {
|
||||
return err
|
||||
frameRate := uint32(30)
|
||||
if d.mime == "audio/opus" {
|
||||
frameRate = 50
|
||||
}
|
||||
|
||||
// send a number of blank frames just in case there is loss.
|
||||
// Intentionally ignoring check for mute or bandwidth constrained mute
|
||||
// as this is used to clear client side buffer.
|
||||
numFrames := int(float32(frameRate) * duration)
|
||||
snts, frameEndNeeded, err := d.forwarder.GetSnTsForBlankFrames(frameRate, numFrames)
|
||||
if err != nil {
|
||||
d.logger.Warnw("could not get SN/TS to write blank frames", err)
|
||||
return err
|
||||
}
|
||||
|
||||
for i := 0; i < len(snts); i++ {
|
||||
hdr := rtp.Header{
|
||||
Version: 2,
|
||||
@@ -854,11 +894,14 @@ func (d *DownTrack) writeBlankFrameRTP() error {
|
||||
|
||||
err = d.writeRTPHeaderExtensions(&hdr)
|
||||
if err != nil {
|
||||
d.logger.Warnw("could not write header extensions for blank frame", err)
|
||||
return err
|
||||
}
|
||||
|
||||
var pktSize int
|
||||
switch d.mime {
|
||||
case "audio/opus":
|
||||
pktSize, err = d.writeOpusBlankFrame(&hdr, frameEndNeeded)
|
||||
case "video/vp8":
|
||||
pktSize, err = d.writeVP8BlankFrame(&hdr, frameEndNeeded)
|
||||
case "video/h264":
|
||||
@@ -867,6 +910,7 @@ func (d *DownTrack) writeBlankFrameRTP() error {
|
||||
return nil
|
||||
}
|
||||
if err != nil {
|
||||
d.logger.Warnw("could not write blank frame", err)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -882,10 +926,25 @@ func (d *DownTrack) writeBlankFrameRTP() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DownTrack) writeOpusBlankFrame(hdr *rtp.Header, frameEndNeeded bool) (int, error) {
|
||||
// silence frame
|
||||
// Used shortly after muting to ensure residual noise does not keep
|
||||
// generating noise at the decoder after the stream is stopped
|
||||
// i. e. comfort noise generation actually not producing something comfortable.
|
||||
payload := make([]byte, len(OpusSilenceFrame))
|
||||
copy(payload[0:], OpusSilenceFrame)
|
||||
|
||||
_, err := d.writeStream.WriteRTP(hdr, payload)
|
||||
if err == nil {
|
||||
d.rtpStats.Update(hdr, len(payload), 0, time.Now().UnixNano())
|
||||
}
|
||||
return hdr.MarshalSize() + len(payload), err
|
||||
}
|
||||
|
||||
func (d *DownTrack) writeVP8BlankFrame(hdr *rtp.Header, frameEndNeeded bool) (int, error) {
|
||||
blankVP8 := d.forwarder.GetPaddingVP8(frameEndNeeded)
|
||||
|
||||
// 1x1 key frame
|
||||
// 8x8 key frame
|
||||
// Used even when closing out a previous frame. Looks like receivers
|
||||
// do not care about content (it will probably end up being an undecodable
|
||||
// frame, but that should be okay as there are key frames following)
|
||||
|
||||
@@ -1268,16 +1268,15 @@ func (f *Forwarder) GetSnTsForPadding(num int) ([]SnTs, error) {
|
||||
return f.rtpMunger.UpdateAndGetPaddingSnTs(num, 0, 0, forceMarker)
|
||||
}
|
||||
|
||||
func (f *Forwarder) GetSnTsForBlankFrames() ([]SnTs, bool, error) {
|
||||
func (f *Forwarder) GetSnTsForBlankFrames(frameRate uint32, numPackets int) ([]SnTs, bool, error) {
|
||||
f.lock.Lock()
|
||||
defer f.lock.Unlock()
|
||||
|
||||
num := RTPBlankFramesMax
|
||||
frameEndNeeded := !f.rtpMunger.IsOnFrameBoundary()
|
||||
if frameEndNeeded {
|
||||
num++
|
||||
numPackets++
|
||||
}
|
||||
snts, err := f.rtpMunger.UpdateAndGetPaddingSnTs(num, f.codec.ClockRate, 30, frameEndNeeded)
|
||||
snts, err := f.rtpMunger.UpdateAndGetPaddingSnTs(numPackets, f.codec.ClockRate, frameRate, frameEndNeeded)
|
||||
return snts, frameEndNeeded, err
|
||||
}
|
||||
|
||||
|
||||
+10
-11
@@ -1382,37 +1382,36 @@ func TestForwardGetSnTsForBlankFrames(t *testing.T) {
|
||||
_, _ = f.GetTranslationParams(extPkt, 0)
|
||||
|
||||
// should get back frame end needed as the last packet did not have RTP marker set
|
||||
snts, frameEndNeeded, err := f.GetSnTsForBlankFrames()
|
||||
snts, frameEndNeeded, err := f.GetSnTsForBlankFrames(30, 6)
|
||||
require.NoError(t, err)
|
||||
require.True(t, frameEndNeeded)
|
||||
|
||||
// there should be one more than RTPBlankFramesMax as one would have been allocated to end previous frame
|
||||
numPadding := RTPBlankFramesMax + 1
|
||||
numPadding := 7
|
||||
clockRate := testutils.TestVP8Codec.ClockRate
|
||||
frameRate := uint32(30)
|
||||
var sntsExpected = make([]SnTs, numPadding)
|
||||
for i := 0; i < numPadding; i++ {
|
||||
sntsExpected[i] = SnTs{
|
||||
sequenceNumber: 23333 + uint16(i) + 1,
|
||||
timestamp: 0xabcdef + (uint32(i)*clockRate)/frameRate,
|
||||
sequenceNumber: params.SequenceNumber + uint16(i) + 1,
|
||||
timestamp: params.Timestamp + (uint32(i)*clockRate)/frameRate,
|
||||
}
|
||||
}
|
||||
require.Equal(t, sntsExpected, snts)
|
||||
|
||||
// now that there is a marker, timestamp should jump on first padding when asked again
|
||||
// also number of padding should be RTPBlankFramesMax
|
||||
snts, frameEndNeeded, err = f.GetSnTsForBlankFrames()
|
||||
require.NoError(t, err)
|
||||
require.False(t, frameEndNeeded)
|
||||
|
||||
numPadding = RTPBlankFramesMax
|
||||
numPadding = 6
|
||||
sntsExpected = sntsExpected[:numPadding]
|
||||
for i := 0; i < numPadding; i++ {
|
||||
sntsExpected[i] = SnTs{
|
||||
sequenceNumber: 23340 + uint16(i) + 1,
|
||||
timestamp: 0xabcdef + (uint32(i+1)*clockRate)/frameRate,
|
||||
sequenceNumber: params.SequenceNumber + uint16(len(snts)) + uint16(i) + 1,
|
||||
timestamp: params.Timestamp + (uint32(i+1)*clockRate)/frameRate,
|
||||
}
|
||||
}
|
||||
snts, frameEndNeeded, err = f.GetSnTsForBlankFrames(30, 6)
|
||||
require.NoError(t, err)
|
||||
require.False(t, frameEndNeeded)
|
||||
require.Equal(t, sntsExpected, snts)
|
||||
}
|
||||
|
||||
|
||||
@@ -305,8 +305,8 @@ func TestUpdateAndGetPaddingSnTs(t *testing.T) {
|
||||
var sntsExpected = make([]SnTs, numPadding)
|
||||
for i := 0; i < numPadding; i++ {
|
||||
sntsExpected[i] = SnTs{
|
||||
sequenceNumber: 23333 + uint16(i) + 1,
|
||||
timestamp: 0xabcdef + (uint32(i)*clockRate)/frameRate,
|
||||
sequenceNumber: params.SequenceNumber + uint16(i) + 1,
|
||||
timestamp: params.Timestamp + (uint32(i)*clockRate)/frameRate,
|
||||
}
|
||||
}
|
||||
snts, err := r.UpdateAndGetPaddingSnTs(numPadding, clockRate, frameRate, true)
|
||||
@@ -316,8 +316,8 @@ func TestUpdateAndGetPaddingSnTs(t *testing.T) {
|
||||
// now that there is a marker, timestamp should jump on first padding when asked again
|
||||
for i := 0; i < numPadding; i++ {
|
||||
sntsExpected[i] = SnTs{
|
||||
sequenceNumber: 23343 + uint16(i) + 1,
|
||||
timestamp: 0xabcdef + (uint32(i+1)*clockRate)/frameRate,
|
||||
sequenceNumber: params.SequenceNumber + uint16(len(snts)) + uint16(i) + 1,
|
||||
timestamp: params.Timestamp + (uint32(i+1)*clockRate)/frameRate,
|
||||
}
|
||||
}
|
||||
snts, err = r.UpdateAndGetPaddingSnTs(numPadding, clockRate, frameRate, false)
|
||||
|
||||
Reference in New Issue
Block a user