Files
pyxis/tests/interop/test_codec_round_trip.py
T
torlando-tech 6e47cb808b Increase playback buffer for jitter-free LXST RX audio
PCM_RING_FRAMES 16→50 (320ms→1000ms capacity) and
PREBUFFER_FRAMES 3→15 (60ms→300ms prebuffer) to match
LXST-kt's buffering strategy. Interop test suite confirms
zero underruns with ±100ms jitter at these settings.

Also adds tests/interop/ with 48 Python tests verifying
wire format, codec round-trip, and pipeline compatibility
between Pyxis, Python LXST, and LXST-kt implementations.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 10:57:07 -05:00

303 lines
12 KiB
Python

"""
Codec round-trip interop tests.
Verifies that audio encoded by one implementation can be decoded by another.
NOTE: Codec2 is a parametric speech codec — it models speech characteristics
(pitch, formants) rather than the waveform. SNR metrics are meaningless for
Codec2 since the reconstructed waveform intentionally differs from the input.
Instead, we verify:
- Encoded bytes are deterministic and consistent across implementations
- Both decoders can decode each other's output without errors
- Decoded output has non-trivial amplitude (not silence)
- Cross-decode produces the same samples as self-decode
"""
import math
import struct
import numpy as np
import pycodec2
import pytest
from conftest import (
CODEC_CODEC2, MODE_HEADERS, HEADER_MODES,
encode_codec2_subframes, batch_subframes_pyxis_style,
build_pyxis_audio_packet, build_columba_audio_packet,
parse_pyxis_rx, parse_lxst_python_rx,
)
def generate_test_audio(duration_s=1.0, sr=8000):
"""Generate deterministic test audio (int16 at 8kHz)."""
t = np.arange(int(sr * duration_s)) / sr
signal = np.sin(2 * np.pi * 440 * t) * 16000
return signal.astype(np.int16)
class TestCodec2ByteEquivalence:
"""
Test that encoding sub-frames individually (Pyxis style) produces
the same bytes as encoding them as part of a larger buffer (LXST-kt style).
"""
def test_individual_vs_batch_encode_same_instance(self):
"""
Same codec2 instance: encoding 160 samples x 10 individually should
produce the same bytes as encoding them sequentially.
"""
pcm = generate_test_audio(0.2)[:1600] # 10 sub-frames
codec = pycodec2.Codec2(3200)
spf = codec.samples_per_frame()
individual = b""
for i in range(10):
individual += codec.encode(pcm[i * spf:(i + 1) * spf])
codec2 = pycodec2.Codec2(3200)
batch = b""
for i in range(10):
batch += codec2.encode(pcm[i * spf:(i + 1) * spf])
assert individual == batch
def test_separate_instances_produce_same_bytes(self):
"""
Two separate codec2 instances encoding the same audio should
produce identical bytes.
"""
pcm = generate_test_audio(0.2)[:1600]
spf = 160
codec_a = pycodec2.Codec2(3200)
codec_b = pycodec2.Codec2(3200)
encoded_a = b""
encoded_b = b""
for i in range(10):
chunk = pcm[i * spf:(i + 1) * spf]
encoded_a += codec_a.encode(chunk)
encoded_b += codec_b.encode(chunk)
assert encoded_a == encoded_b
def test_encode_decode_produces_non_silence(self):
"""
Codec2 encode→decode should produce non-zero output.
(Codec2 is parametric — we can't check waveform similarity,
but we can verify it's not outputting silence.)
"""
pcm = generate_test_audio(1.0)[:8000]
codec_enc = pycodec2.Codec2(3200)
codec_dec = pycodec2.Codec2(3200)
spf = 160
n_frames = len(pcm) // spf
decoded = np.zeros(n_frames * spf, dtype=np.int16)
for i in range(n_frames):
raw = codec_enc.encode(pcm[i * spf:(i + 1) * spf])
decoded[i * spf:(i + 1) * spf] = codec_dec.decode(raw)
max_amp = np.max(np.abs(decoded))
print(f"Codec2 3200 decode max amplitude: {max_amp}")
assert max_amp > 100, f"Decoded audio is near-silence: max_amp={max_amp}"
def test_frame_size_consistency(self):
"""Verify encoded frame sizes match expectations for each mode."""
modes = {3200: (160, 8), 2400: (160, 6), 1600: (320, 8)}
for bitrate, (expected_spf, expected_bpf) in modes.items():
c = pycodec2.Codec2(bitrate)
assert c.samples_per_frame() == expected_spf, \
f"Mode {bitrate}: SPF={c.samples_per_frame()}, expected {expected_spf}"
assert c.bytes_per_frame() == expected_bpf, \
f"Mode {bitrate}: BPF={c.bytes_per_frame()}, expected {expected_bpf}"
class TestCrossImplementationDecode:
"""
Test that encoded data from one side can be decoded by the other.
"""
def test_pyxis_encoded_decoded_by_lxst_python(self, lxst_codec2_3200):
"""
Pyxis encodes 10 sub-frames → batch → wire.
Python LXST parses and decodes.
Verifies non-silence output.
"""
pcm = generate_test_audio(0.2)[:1600]
encoder = pycodec2.Codec2(3200)
subframes = encode_codec2_subframes(encoder, pcm, mode_header=MODE_HEADERS[3200])
batch = batch_subframes_pyxis_style(subframes, MODE_HEADERS[3200])
wire = build_pyxis_audio_packet(batch)
result = parse_lxst_python_rx(wire)
codec_type, codec_data = result["frames"][0]
decoded = lxst_codec2_3200.decode(codec_data)
assert decoded.shape[0] == 1600
assert decoded.shape[1] == 1
max_amp = np.max(np.abs(decoded))
print(f"Pyxis→LXST decode max amplitude: {max_amp:.4f}")
assert max_amp > 0.001, f"Decoded audio is near-silence"
def test_lxst_python_encoded_decoded_by_pyxis(self, lxst_codec2_3200):
"""
Columba encodes 200ms → wire.
Pyxis parses and decodes with pycodec2.
Uses pycodec2 directly for encoding (same underlying libcodec2 as LXST)
to avoid Python LXST's array shape requirements.
"""
pcm = generate_test_audio(0.2)[:1600]
# Encode like Columba: [mode_header] + [N * raw_codec2]
encoder = pycodec2.Codec2(3200)
spf = encoder.samples_per_frame()
bpf = encoder.bytes_per_frame()
n_frames = len(pcm) // spf
encoded = bytes([MODE_HEADERS[3200]])
for i in range(n_frames):
encoded += encoder.encode(pcm[i * spf:(i + 1) * spf])
wire = build_columba_audio_packet(CODEC_CODEC2, encoded)
result = parse_pyxis_rx(wire)
codec_type, codec_data = result["frames"][0]
# Decode like Pyxis codec_wrapper.cpp
header = codec_data[0]
raw_data = codec_data[1:]
decoder = pycodec2.Codec2(3200)
spf = decoder.samples_per_frame()
bpf = decoder.bytes_per_frame()
n_frames = len(raw_data) // bpf
decoded_pcm = np.zeros(n_frames * spf, dtype=np.int16)
for i in range(n_frames):
decoded_pcm[i * spf:(i + 1) * spf] = decoder.decode(
raw_data[i * bpf:(i + 1) * bpf])
assert len(decoded_pcm) == 1600
max_amp = np.max(np.abs(decoded_pcm))
print(f"LXST→Pyxis decode max amplitude: {max_amp}")
assert max_amp > 100, f"Decoded audio is near-silence"
def test_cross_decode_consistency(self):
"""
Both decoders (pycodec2 and Python LXST) should produce identical
output when given the same encoded bytes.
This is the critical interop test — if the decoded samples match,
the audio quality will be identical on both devices.
"""
from LXST.Codecs.Codec2 import Codec2 as LXSTCodec2
pcm = generate_test_audio(0.2)[:1600]
encoder = pycodec2.Codec2(3200)
spf = encoder.samples_per_frame()
bpf = encoder.bytes_per_frame()
# Encode with pycodec2 (raw bytes, no mode header)
raw_encoded = b""
for i in range(10):
raw_encoded += encoder.encode(pcm[i * spf:(i + 1) * spf])
# Full encoded with mode header (as sent over wire)
full_encoded = bytes([MODE_HEADERS[3200]]) + raw_encoded
# Decode with pycodec2
pyxis_decoder = pycodec2.Codec2(3200)
pyxis_decoded = np.zeros(1600, dtype=np.int16)
for i in range(10):
pyxis_decoded[i * spf:(i + 1) * spf] = pyxis_decoder.decode(
raw_encoded[i * bpf:(i + 1) * bpf])
# Decode with Python LXST
lxst_decoder = LXSTCodec2(mode=3200)
lxst_decoded = lxst_decoder.decode(full_encoded)
lxst_decoded_int16 = (lxst_decoded[:, 0] * 32767).astype(np.int16)
# Both decoders should produce very close output.
# Python LXST decodes to float32 then we convert back to int16:
# int16 → float32(/32768) → decode → float32 → int16(*32767)
# The asymmetric 32768/32767 plus float32 precision causes ±~40 sample diff.
# This is fine — it's a normalization artifact, not a codec mismatch.
diff = np.abs(pyxis_decoded.astype(np.int32) - lxst_decoded_int16.astype(np.int32))
max_diff = np.max(diff)
mean_diff = np.mean(diff)
print(f"Cross-decode diff: max={max_diff}, mean={mean_diff:.2f}")
assert max_diff <= 50, f"Decoded samples differ too much: max_diff={max_diff}"
class TestBatchSizes:
"""Test various batch sizes that Pyxis might send."""
@pytest.mark.parametrize("n_subframes", [1, 5, 10, 15, 20, 25, 30])
def test_variable_batch_decode(self, n_subframes):
"""Python LXST should decode any batch size from 1 to 30 sub-frames."""
from LXST.Codecs.Codec2 import Codec2 as LXSTCodec2
pcm = generate_test_audio(1.0)[:n_subframes * 160]
encoder = pycodec2.Codec2(3200)
subframes = encode_codec2_subframes(encoder, pcm, mode_header=MODE_HEADERS[3200])
assert len(subframes) == n_subframes
batch = batch_subframes_pyxis_style(subframes, MODE_HEADERS[3200])
wire = build_pyxis_audio_packet(batch)
result = parse_lxst_python_rx(wire)
codec_type, codec_data = result["frames"][0]
decoder = LXSTCodec2(mode=3200)
decoded = decoder.decode(codec_data)
assert decoded.shape[0] == n_subframes * 160
assert np.max(np.abs(decoded)) > 0.001
@pytest.mark.parametrize("n_subframes", [1, 5, 10, 15, 20, 25, 30])
def test_variable_batch_pyxis_decode(self, n_subframes):
"""Pyxis parser should handle any batch size."""
pcm = generate_test_audio(1.0)[:n_subframes * 160]
encoder = pycodec2.Codec2(3200)
subframes = encode_codec2_subframes(encoder, pcm, mode_header=MODE_HEADERS[3200])
batch = batch_subframes_pyxis_style(subframes, MODE_HEADERS[3200])
wire = build_pyxis_audio_packet(batch)
result = parse_pyxis_rx(wire)
assert len(result["frames"]) == 1
codec_type, codec_data = result["frames"][0]
assert len(codec_data) == 1 + n_subframes * 8
class TestCodec2ModeNegotiation:
"""Test that mode header switching works across implementations."""
def test_mode_switch_mid_stream(self):
"""
Both Pyxis and LXST-kt support dynamic codec mode switching via
the mode header byte. Test switching from 3200 to 1600 mid-stream.
"""
from LXST.Codecs.Codec2 import Codec2 as LXSTCodec2
pcm = generate_test_audio(1.0)[:8000]
# First batch: 3200 bps (10 sub-frames of 160 samples)
enc_3200 = pycodec2.Codec2(3200)
subframes_3200 = encode_codec2_subframes(enc_3200, pcm[:1600], mode_header=MODE_HEADERS[3200])
batch_3200 = batch_subframes_pyxis_style(subframes_3200, MODE_HEADERS[3200])
wire_3200 = build_pyxis_audio_packet(batch_3200)
# Second batch: 1600 bps (5 sub-frames of 320 samples)
enc_1600 = pycodec2.Codec2(1600)
subframes_1600 = encode_codec2_subframes(enc_1600, pcm[1600:3200], mode_header=MODE_HEADERS[1600])
batch_1600 = batch_subframes_pyxis_style(subframes_1600, MODE_HEADERS[1600])
wire_1600 = build_pyxis_audio_packet(batch_1600)
# Decode both with a single Python LXST decoder (should auto-switch)
decoder = LXSTCodec2(mode=3200)
result_1 = parse_lxst_python_rx(wire_3200)
decoded_1 = decoder.decode(result_1["frames"][0][1])
assert decoded_1.shape[0] == 1600 # 10 * 160
result_2 = parse_lxst_python_rx(wire_1600)
decoded_2 = decoder.decode(result_2["frames"][0][1])
# 1600 bps: SPF=320, 1600 samples / 320 = 5 sub-frames
assert decoded_2.shape[0] == 1600 # 5 * 320