Files
livekit/pkg/sfu/buffer/helpers.go
Raja Subramanian 129b1df8e6 Use VP9 Key frame detection from Galene. (#1973)
* Use VP9 Key frame detection from Galene.

With ffmpeg generated VP9 file with single layer
and publishing via Go SDK, the key picture determination
outlined at https://datatracker.ietf.org/doc/html/draft-ietf-payload-vp9-16#page-13
under the F bit explanation does not work. It declares kay frame for
pretty much all frames. Unclear if ffmpeg generated bitstream has issues
or if that procedure in the above document does not work for single
layer case.

Using the bit stream explained here
https://storage.googleapis.com/downloads.webmproject.org/docs/vp9/vp9-bitstream-specification-v0.6-20160331-draft.pdf
(pages 28, 62, 63) implemented in Galene.
That is more expensive as it has to parse more, but works in all cases.

* Add AV1-TODo

* add some TODOs
2023-08-17 22:33:11 +05:30

416 lines
8.8 KiB
Go

// Copyright 2023 LiveKit, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package buffer
import (
"encoding/binary"
"errors"
"github.com/livekit/protocol/logger"
"github.com/pion/rtp/codecs"
)
var (
errShortPacket = errors.New("packet is not large enough")
errNilPacket = errors.New("invalid nil packet")
errInvalidPacket = errors.New("invalid packet")
)
// VP8 is a helper to get temporal data from VP8 packet header
/*
VP8 Payload Descriptor
0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
|X|R|N|S|R| PID | (REQUIRED) |X|R|N|S|R| PID | (REQUIRED)
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
X: |I|L|T|K| RSV | (OPTIONAL) X: |I|L|T|K| RSV | (OPTIONAL)
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
I: |M| PictureID | (OPTIONAL) I: |M| PictureID | (OPTIONAL)
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
L: | TL0PICIDX | (OPTIONAL) | PictureID |
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
T/K:|TID|Y| KEYIDX | (OPTIONAL) L: | TL0PICIDX | (OPTIONAL)
+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
T/K:|TID|Y| KEYIDX | (OPTIONAL)
+-+-+-+-+-+-+-+-+
*/
type VP8 struct {
FirstByte byte
S bool
I bool
M bool
PictureID uint16 /* 8 or 16 bits, picture ID */
L bool
TL0PICIDX uint8 /* 8 bits temporal level zero index */
// Optional Header If either of the T or K bits are set to 1,
// the TID/Y/KEYIDX extension field MUST be present.
T bool
TID uint8 /* 2 bits temporal layer idx */
Y bool
K bool
KEYIDX uint8 /* 5 bits of key frame idx */
HeaderSize int
// IsKeyFrame is a helper to detect if current packet is a keyframe
IsKeyFrame bool
}
// Unmarshal parses the passed byte slice and stores the result in the VP8 this method is called upon
func (v *VP8) Unmarshal(payload []byte) error {
if payload == nil {
return errNilPacket
}
payloadLen := len(payload)
if payloadLen < 1 {
return errShortPacket
}
idx := 0
v.FirstByte = payload[idx]
v.S = payload[idx]&0x10 > 0
// Check for extended bit control
if payload[idx]&0x80 > 0 {
idx++
if payloadLen < idx+1 {
return errShortPacket
}
v.I = payload[idx]&0x80 > 0
v.L = payload[idx]&0x40 > 0
v.T = payload[idx]&0x20 > 0
v.K = payload[idx]&0x10 > 0
if v.L && !v.T {
return errInvalidPacket
}
if v.I {
idx++
if payloadLen < idx+1 {
return errShortPacket
}
pid := payload[idx] & 0x7f
// if m is 1, then Picture ID is 15 bits
v.M = payload[idx]&0x80 > 0
if v.M {
idx++
if payloadLen < idx+1 {
return errShortPacket
}
v.PictureID = binary.BigEndian.Uint16([]byte{pid, payload[idx]})
} else {
v.PictureID = uint16(pid)
}
}
if v.L {
idx++
if payloadLen < idx+1 {
return errShortPacket
}
v.TL0PICIDX = payload[idx]
}
if v.T || v.K {
idx++
if payloadLen < idx+1 {
return errShortPacket
}
if v.T {
v.TID = (payload[idx] & 0xc0) >> 6
v.Y = (payload[idx] & 0x20) > 0
}
if v.K {
v.KEYIDX = payload[idx] & 0x1f
}
}
idx++
if payloadLen < idx+1 {
return errShortPacket
}
// Check is packet is a keyframe by looking at P bit in vp8 payload
v.IsKeyFrame = payload[idx]&0x01 == 0 && v.S
} else {
idx++
if payloadLen < idx+1 {
return errShortPacket
}
// Check is packet is a keyframe by looking at P bit in vp8 payload
v.IsKeyFrame = payload[idx]&0x01 == 0 && v.S
}
v.HeaderSize = idx
return nil
}
func (v *VP8) Marshal() ([]byte, error) {
buf := make([]byte, v.HeaderSize)
err := v.MarshalTo(buf)
return buf, err
}
func (v *VP8) MarshalTo(buf []byte) error {
if len(buf) < v.HeaderSize {
return errShortPacket
}
idx := 0
buf[idx] = v.FirstByte
if v.I || v.L || v.T || v.K {
buf[idx] |= 0x80 // X bit
idx++
xpos := idx
xval := byte(0)
idx++
if v.I {
xval |= (1 << 7)
if v.M {
buf[idx] = 0x80 | byte((v.PictureID>>8)&0x7f)
buf[idx+1] = byte(v.PictureID & 0xff)
idx += 2
} else {
buf[idx] = byte(v.PictureID)
idx++
}
}
if v.L {
xval |= (1 << 6)
buf[idx] = v.TL0PICIDX
idx++
}
if v.T || v.K {
buf[idx] = 0
if v.T {
xval |= (1 << 5)
buf[idx] = v.TID << 6
if v.Y {
buf[idx] |= (1 << 5)
}
}
if v.K {
xval |= (1 << 4)
buf[idx] |= v.KEYIDX & 0x1f
}
idx++
}
buf[xpos] = xval
} else {
buf[idx] &^= 0x80 // X bit
idx++
}
return nil
}
// -------------------------------------
func VPxPictureIdSizeDiff(mBit1 bool, mBit2 bool) int {
if mBit1 == mBit2 {
return 0
}
if mBit1 {
return 1
}
return -1
}
// -------------------------------------
// IsH264KeyFrame detects if h264 payload is a keyframe
// this code was taken from https://github.com/jech/galene/blob/codecs/rtpconn/rtpreader.go#L45
// all credits belongs to Juliusz Chroboczek @jech and the awesome Galene SFU
func IsH264KeyFrame(payload []byte) bool {
if len(payload) < 1 {
return false
}
nalu := payload[0] & 0x1F
if nalu == 0 {
// reserved
return false
} else if nalu <= 23 {
// simple NALU
return nalu == 7
} else if nalu == 24 || nalu == 25 || nalu == 26 || nalu == 27 {
// STAP-A, STAP-B, MTAP16 or MTAP24
i := 1
if nalu == 25 || nalu == 26 || nalu == 27 {
// skip DON
i += 2
}
for i < len(payload) {
if i+2 > len(payload) {
return false
}
length := uint16(payload[i])<<8 |
uint16(payload[i+1])
i += 2
if i+int(length) > len(payload) {
return false
}
offset := 0
if nalu == 26 {
offset = 3
} else if nalu == 27 {
offset = 4
}
if offset >= int(length) {
return false
}
n := payload[i+offset] & 0x1F
if n == 7 {
return true
} else if n >= 24 {
// is this legal?
logger.Debugw("Non-simple NALU within a STAP")
}
i += int(length)
}
if i == len(payload) {
return false
}
return false
} else if nalu == 28 || nalu == 29 {
// FU-A or FU-B
if len(payload) < 2 {
return false
}
if (payload[1] & 0x80) == 0 {
// not a starting fragment
return false
}
return payload[1]&0x1F == 7
}
return false
}
// -------------------------------------
// IsVP9KeyFrame detects if vp9 payload is a keyframe
// taken from https://github.com/jech/galene/blob/master/codecs/codecs.go
// all credits belongs to Juliusz Chroboczek @jech and the awesome Galene SFU
func IsVP9KeyFrame(payload []byte) bool {
var vp9 codecs.VP9Packet
_, err := vp9.Unmarshal(payload)
if err != nil || len(vp9.Payload) < 1 {
return false
}
if !vp9.B {
return false
}
if (vp9.Payload[0] & 0xc0) != 0x80 {
return false
}
profile := (vp9.Payload[0] >> 4) & 0x3
if profile != 3 {
return (vp9.Payload[0] & 0xC) == 0
}
return (vp9.Payload[0] & 0x6) == 0
}
// -------------------------------------
// IsAV1KeyFrame detects if av1 payload is a keyframe
// taken from https://github.com/jech/galene/blob/master/codecs/codecs.go
// all credits belongs to Juliusz Chroboczek @jech and the awesome Galene SFU
func IsAV1KeyFrame(payload []byte) bool {
if len(payload) < 2 {
return false
}
// Z=0, N=1
if (payload[0] & 0x88) != 0x08 {
return false
}
w := (payload[0] & 0x30) >> 4
getObu := func(data []byte, last bool) ([]byte, int, bool) {
if last {
return data, len(data), false
}
offset := 0
length := 0
for {
if len(data) <= offset {
return nil, offset, offset > 0
}
l := data[offset]
length |= int(l&0x7f) << (offset * 7)
offset++
if (l & 0x80) == 0 {
break
}
}
if len(data) < offset+length {
return data[offset:], len(data), true
}
return data[offset : offset+length],
offset + length, false
}
offset := 1
i := 0
for {
obu, length, truncated :=
getObu(payload[offset:], int(w) == i+1)
if len(obu) < 1 {
return false
}
tpe := (obu[0] & 0x38) >> 3
switch i {
case 0:
// OBU_SEQUENCE_HEADER
if tpe != 1 {
return false
}
default:
// OBU_FRAME_HEADER or OBU_FRAME
if tpe == 3 || tpe == 6 {
if len(obu) < 2 {
return false
}
// show_existing_frame == 0
if (obu[1] & 0x80) != 0 {
return false
}
// frame_type == KEY_FRAME
return (obu[1] & 0x60) == 0
}
}
if truncated || i >= int(w) {
// the first frame header is in a second
// packet, give up.
return false
}
offset += length
i++
}
}
// -------------------------------------