mirror of
https://github.com/livekit/livekit.git
synced 2026-05-25 14:15:15 +00:00
tune speaker detector more sensitive (#427)
* tune speaker detector more sensitive
This commit is contained in:
@@ -192,9 +192,9 @@ func NewConfig(confString string, c *cli.Context) (*Config, error) {
|
||||
},
|
||||
},
|
||||
Audio: AudioConfig{
|
||||
ActiveLevel: 30, // -30dBov = 0.03
|
||||
ActiveLevel: 35, // -35dBov
|
||||
MinPercentile: 40,
|
||||
UpdateInterval: 500,
|
||||
UpdateInterval: 400,
|
||||
SmoothIntervals: 2,
|
||||
},
|
||||
Redis: RedisConfig{},
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
|
||||
const (
|
||||
// duration of audio frames for observe window
|
||||
observeDuration = 500 // ms
|
||||
SilentAudioLevel = 127
|
||||
)
|
||||
|
||||
@@ -20,17 +19,19 @@ type AudioLevel struct {
|
||||
|
||||
// for Observe goroutine use
|
||||
// keeps track of current activity
|
||||
observeLevel uint8
|
||||
activeDuration uint32 // ms
|
||||
observedDuration uint32 // ms
|
||||
observeLevel uint8
|
||||
activeDuration uint32 // ms
|
||||
observedDuration uint32 // ms
|
||||
durationToObserve uint32 // ms
|
||||
}
|
||||
|
||||
func NewAudioLevel(activeLevel uint8, minPercentile uint8) *AudioLevel {
|
||||
func NewAudioLevel(activeLevel uint8, minPercentile uint8, observeDuration uint32) *AudioLevel {
|
||||
l := &AudioLevel{
|
||||
levelThreshold: activeLevel,
|
||||
minActiveDuration: uint32(minPercentile) * observeDuration / 100,
|
||||
currentLevel: SilentAudioLevel,
|
||||
observeLevel: SilentAudioLevel,
|
||||
durationToObserve: observeDuration,
|
||||
}
|
||||
return l
|
||||
}
|
||||
@@ -46,10 +47,10 @@ func (l *AudioLevel) Observe(level uint8, durationMs uint32) {
|
||||
}
|
||||
}
|
||||
|
||||
if l.observedDuration >= observeDuration {
|
||||
if l.observedDuration >= l.durationToObserve {
|
||||
// compute and reset
|
||||
if l.activeDuration >= l.minActiveDuration {
|
||||
level := uint32(l.observeLevel) - uint32(20*math.Log10(float64(l.activeDuration)/float64(observeDuration)))
|
||||
level := uint32(l.observeLevel) - uint32(20*math.Log10(float64(l.activeDuration)/float64(l.durationToObserve)))
|
||||
atomic.StoreUint32(&l.currentLevel, level)
|
||||
} else {
|
||||
atomic.StoreUint32(&l.currentLevel, SilentAudioLevel)
|
||||
|
||||
@@ -12,12 +12,13 @@ const (
|
||||
samplesPerBatch = 25
|
||||
defaultActiveLevel = 30
|
||||
// requires two noisy samples to count
|
||||
defaultPercentile = 10
|
||||
defaultPercentile = 10
|
||||
defaultObserveDuration = 500 // ms
|
||||
)
|
||||
|
||||
func TestAudioLevel(t *testing.T) {
|
||||
t.Run("initially to return not noisy, within a few samples", func(t *testing.T) {
|
||||
a := rtc.NewAudioLevel(defaultActiveLevel, defaultPercentile)
|
||||
a := rtc.NewAudioLevel(defaultActiveLevel, defaultPercentile, defaultObserveDuration)
|
||||
_, noisy := a.GetLevel()
|
||||
require.False(t, noisy)
|
||||
|
||||
@@ -27,7 +28,7 @@ func TestAudioLevel(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("not noisy when all samples are below threshold", func(t *testing.T) {
|
||||
a := rtc.NewAudioLevel(defaultActiveLevel, defaultPercentile)
|
||||
a := rtc.NewAudioLevel(defaultActiveLevel, defaultPercentile, defaultObserveDuration)
|
||||
|
||||
observeSamples(a, 35, 100)
|
||||
_, noisy := a.GetLevel()
|
||||
@@ -35,7 +36,7 @@ func TestAudioLevel(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("not noisy when less than percentile samples are above threshold", func(t *testing.T) {
|
||||
a := rtc.NewAudioLevel(defaultActiveLevel, defaultPercentile)
|
||||
a := rtc.NewAudioLevel(defaultActiveLevel, defaultPercentile, defaultObserveDuration)
|
||||
|
||||
observeSamples(a, 35, samplesPerBatch-2)
|
||||
observeSamples(a, 25, 1)
|
||||
@@ -46,7 +47,7 @@ func TestAudioLevel(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("noisy when higher than percentile samples are above threshold", func(t *testing.T) {
|
||||
a := rtc.NewAudioLevel(defaultActiveLevel, defaultPercentile)
|
||||
a := rtc.NewAudioLevel(defaultActiveLevel, defaultPercentile, defaultObserveDuration)
|
||||
|
||||
observeSamples(a, 35, samplesPerBatch-16)
|
||||
observeSamples(a, 25, 8)
|
||||
|
||||
@@ -125,7 +125,7 @@ func (t *MediaTrack) AddReceiver(receiver *webrtc.RTPReceiver, track *webrtc.Tra
|
||||
|
||||
if t.Kind() == livekit.TrackType_AUDIO {
|
||||
t.audioLevelMu.Lock()
|
||||
t.audioLevel = NewAudioLevel(t.params.AudioConfig.ActiveLevel, t.params.AudioConfig.MinPercentile)
|
||||
t.audioLevel = NewAudioLevel(t.params.AudioConfig.ActiveLevel, t.params.AudioConfig.MinPercentile, t.params.AudioConfig.UpdateInterval)
|
||||
buff.OnAudioLevel(func(level uint8, duration uint32) {
|
||||
t.audioLevelMu.RLock()
|
||||
defer t.audioLevelMu.RUnlock()
|
||||
|
||||
Reference in New Issue
Block a user