add exponential backoff to room service check retries (#2462)

This commit is contained in:
Paul Wells
2024-02-07 19:35:58 -08:00
committed by GitHub
parent 89e58b150e
commit fd35c9edfc
4 changed files with 23 additions and 22 deletions

1
go.mod
View File

@@ -3,6 +3,7 @@ module github.com/livekit/livekit-server
go 1.20
require (
github.com/avast/retry-go/v4 v4.5.1
github.com/bep/debounce v1.2.1
github.com/d5/tengo/v2 v2.16.1
github.com/dustin/go-humanize v1.0.1

2
go.sum
View File

@@ -1,3 +1,5 @@
github.com/avast/retry-go/v4 v4.5.1 h1:AxIx0HGi4VZ3I02jr78j5lZ3M6x1E0Ivxa6b0pUUh7o=
github.com/avast/retry-go/v4 v4.5.1/go.mod h1:/sipNsvNB3RRuT5iNcb6h73nw3IBmXJ/H3XrCQYSOpc=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bep/debounce v1.2.1 h1:v67fRdBA9UQu2NhLFXrSg0Brw7CexQekrBwDMM8bzeY=

View File

@@ -297,19 +297,22 @@ type IngressConfig struct {
type SIPConfig struct {
}
// not exposed to YAML
type APIConfig struct {
// amount of time to wait for API to execute, default 2s
ExecutionTimeout time.Duration
ExecutionTimeout time.Duration `yaml:"execution_timeout,omitempty"`
// amount of time to wait before checking for operation complete
CheckInterval time.Duration
// min amount of time to wait before checking for operation complete
CheckInterval time.Duration `yaml:"check_interval,omitempty"`
// max amount of time to wait before checking for operation complete
MaxCheckInterval time.Duration `yaml:"max_check_interval,omitempty"`
}
func DefaultAPIConfig() APIConfig {
return APIConfig{
ExecutionTimeout: 2 * time.Second,
CheckInterval: 100 * time.Millisecond,
MaxCheckInterval: 300 * time.Second,
}
}

View File

@@ -17,8 +17,8 @@ package service
import (
"context"
"strconv"
"time"
"github.com/avast/retry-go/v4"
"github.com/pkg/errors"
"github.com/twitchtv/twirp"
@@ -101,7 +101,7 @@ func (s *RoomService) CreateRoom(ctx context.Context, req *livekit.CreateRoomReq
defer res.ResponseSource.Close()
// ensure it's created correctly
err = s.confirmExecution(func() error {
err = s.confirmExecution(ctx, func() error {
_, _, err := s.roomStore.LoadRoom(ctx, livekit.RoomName(req.Name), false)
if err != nil {
return ErrOperationFailed
@@ -299,7 +299,7 @@ func (s *RoomService) UpdateRoomMetadata(ctx context.Context, req *livekit.Updat
return nil, err
}
err = s.confirmExecution(func() error {
err = s.confirmExecution(ctx, func() error {
room, _, err = s.roomStore.LoadRoom(ctx, livekit.RoomName(req.Room), false)
if err != nil {
return err
@@ -326,19 +326,14 @@ func (s *RoomService) UpdateRoomMetadata(ctx context.Context, req *livekit.Updat
return room, nil
}
func (s *RoomService) confirmExecution(f func() error) error {
expired := time.After(s.apiConf.ExecutionTimeout)
var err error
for {
select {
case <-expired:
return err
default:
err = f()
if err == nil {
return nil
}
time.Sleep(s.apiConf.CheckInterval)
}
}
func (s *RoomService) confirmExecution(ctx context.Context, f func() error) error {
ctx, cancel := context.WithTimeout(ctx, s.apiConf.ExecutionTimeout)
defer cancel()
return retry.Do(
f,
retry.Context(ctx),
retry.Delay(s.apiConf.CheckInterval),
retry.MaxDelay(s.apiConf.MaxCheckInterval),
retry.DelayType(retry.BackOffDelay),
)
}