Files
pyxis/lib/lxst_audio/i2s_capture.cpp
torlando-tech 6744eb136d LXST voice call stability: fix hangup crash, signal queue, TX pump, mic tuning
- Fix use-after-free crash on hangup: set _call_state=IDLE before deleting
  _lxst_audio, preventing pump_call_tx() (runs without LVGL lock) from
  accessing freed memory
- Replace single-slot _call_signal_pending with 8-element ring buffer queue
  to prevent signal loss when CONNECTING+ESTABLISHED arrive in rapid succession
- Extract TX pump into pump_call_tx() called right after reticulum->loop()
  for low-latency audio TX without LVGL lock dependency (was buried at step 10)
- Tune ES7210 mic gain to 21dB (was 15dB) to improve Codec2 input level
  without ADC clipping that occurred at 24dB
- I2S capture: use APLL for accurate 8kHz clock, direct 8kHz sampling
  (no more 16→8kHz decimation), DMA 16x64 for encode burst headroom
- Reduce Reticulum log verbosity to LOG_INFO (was LOG_TRACE)
- BLE: add ble_hs_sched_reset() tiered recovery before reboot on desync,
  widen supervision timeout to 4.0s for WiFi coexistence
- Add UDP multicast log broadcasting and OTA flash support

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-25 10:57:14 -05:00

334 lines
12 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#include "i2s_capture.h"
#ifdef ARDUINO
#include <cstring>
#include <driver/i2s.h>
#include <esp_log.h>
#include <esp_heap_caps.h>
#include <Hardware/TDeck/Config.h>
#include "codec_wrapper.h"
#include "audio_filters.h"
#include "encoded_ring_buffer.h"
#include <Arduino.h>
using namespace Hardware::TDeck;
static const char* TAG = "LXST:Capture";
// Defined in main.cpp — sends to both Serial and UDP
extern "C" void pyxis_log(const char* msg);
I2SCapture::I2SCapture() = default;
I2SCapture::~I2SCapture() {
stop();
// Ensure I2S driver is released even if stop() skipped (not capturing)
if (i2sInitialized_) {
i2s_stop(I2S_NUM_1);
i2s_driver_uninstall(I2S_NUM_1);
i2sInitialized_ = false;
}
releaseBuffers();
}
bool I2SCapture::init() {
if (i2sInitialized_) return true;
// Defensively uninstall in case a previous session leaked the driver
i2s_driver_uninstall(I2S_NUM_1);
// Configure I2S_NUM_1 for mic capture from ES7210
// Settings match official LilyGO T-Deck Plus Microphone example
i2s_config_t i2s_config = {};
i2s_config.mode = static_cast<i2s_mode_t>(I2S_MODE_MASTER | I2S_MODE_RX);
i2s_config.sample_rate = I2S_SAMPLE_RATE; // 8kHz — matches Codec2 directly
i2s_config.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT;
i2s_config.channel_format = I2S_CHANNEL_FMT_ALL_LEFT;
i2s_config.communication_format = I2S_COMM_FORMAT_STAND_I2S;
i2s_config.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1;
// At 8kHz × 2 TDM channels = 16ksps. Filter+encode burst for 1600 samples
// takes ~20ms; 16 × 64 = 1024 samples = 64ms headroom prevents DMA overflow.
i2s_config.dma_buf_count = 16;
i2s_config.dma_buf_len = 64;
i2s_config.use_apll = true; // APLL gives accurate audio clocks (vs main PLL integer dividers)
i2s_config.tx_desc_auto_clear = true;
i2s_config.fixed_mclk = 4096000; // Force 4.096MHz MCLK (matches ES7210 coeff table for 8kHz)
i2s_config.mclk_multiple = I2S_MCLK_MULTIPLE_256; // Ignored when fixed_mclk is set
i2s_config.bits_per_chan = I2S_BITS_PER_CHAN_16BIT;
// TDM channel mask — required for ES7210 on T-Deck Plus
i2s_config.chan_mask = static_cast<i2s_channel_t>(I2S_TDM_ACTIVE_CH0 | I2S_TDM_ACTIVE_CH1);
esp_err_t err = i2s_driver_install(I2S_NUM_1, &i2s_config, 0, NULL);
if (err != ESP_OK) {
ESP_LOGE(TAG, "I2S_NUM_1 driver install failed: %d", err);
return false;
}
i2s_pin_config_t pin_config = {};
pin_config.mck_io_num = Audio::MIC_MCLK;
pin_config.bck_io_num = Audio::MIC_SCK;
pin_config.ws_io_num = Audio::MIC_LRCK;
pin_config.data_in_num = Audio::MIC_DIN;
pin_config.data_out_num = I2S_PIN_NO_CHANGE;
err = i2s_set_pin(I2S_NUM_1, &pin_config);
if (err != ESP_OK) {
ESP_LOGE(TAG, "I2S_NUM_1 pin config failed: %d", err);
i2s_driver_uninstall(I2S_NUM_1);
return false;
}
i2s_zero_dma_buffer(I2S_NUM_1);
i2sInitialized_ = true;
ESP_LOGI(TAG, "I2S capture initialized: %dHz 16-bit TDM, MCLK=4.096MHz", I2S_SAMPLE_RATE);
return true;
}
bool I2SCapture::configureEncoder(Codec2Wrapper* codec, bool enableFilters) {
releaseBuffers();
if (!codec || !codec->isCreated()) {
ESP_LOGE(TAG, "Invalid codec pointer");
return false;
}
codec_ = codec;
// Accumulate FRAMES_PER_BATCH codec frames before filter+encode.
// Columba uses 200ms (1600 samples for Codec2 3200) so the AGC operates
// on meaningful block sizes. With only 160 samples (20ms) the AGC blocks
// are 16 samples and gain-pump, producing buzzy audio.
frameSamples_ = codec_->samplesPerFrame() * FRAMES_PER_BATCH;
filtersEnabled_ = enableFilters;
// Allocate ring buffer in PSRAM
encodedRing_ = new EncodedRingBuffer(ENCODED_RING_SLOTS, ENCODED_RING_MAX_BYTES);
// Allocate accumulation buffer in PSRAM
accumBuffer_ = static_cast<int16_t*>(
heap_caps_malloc(sizeof(int16_t) * frameSamples_, MALLOC_CAP_SPIRAM));
accumCount_ = 0;
// Silence buffer for mute
silenceBuf_ = static_cast<int16_t*>(
heap_caps_calloc(frameSamples_, sizeof(int16_t), MALLOC_CAP_SPIRAM));
// Filter chain: 1 channel (mono), voice band 300-3400Hz, AGC -12dB target, 12dB max gain
// PGA gain is 21dB; loud speech peaks around -6dBFS, quiet around -20dBFS.
// AGC boosts quiet sections; 12dB max prevents noise pumping during silence.
if (enableFilters) {
filterChain_ = new VoiceFilterChain(1, 300.0f, 3400.0f, -12.0f, 12.0f);
}
ESP_LOGI(TAG, "Encoder configured: Codec2 mode %d, %d samples/batch (%d x %d), %d bytes/frame, filters=%d",
codec_->libraryMode(), frameSamples_, FRAMES_PER_BATCH,
codec_->samplesPerFrame(), codec_->bytesPerFrame(), enableFilters);
return true;
}
bool I2SCapture::start() {
if (!i2sInitialized_ || !codec_ || capturing_.load()) return false;
// Set capturing BEFORE starting task to avoid race (same pattern as LXST-kt)
capturing_.store(true, std::memory_order_relaxed);
BaseType_t ret = xTaskCreatePinnedToCore(
captureTask, "lxst_cap", CAPTURE_TASK_STACK, this,
CAPTURE_TASK_PRIORITY, reinterpret_cast<TaskHandle_t*>(&taskHandle_),
CAPTURE_TASK_CORE);
if (ret != pdPASS) {
ESP_LOGE(TAG, "Failed to create capture task");
capturing_.store(false, std::memory_order_relaxed);
return false;
}
ESP_LOGI(TAG, "Capture started");
return true;
}
void I2SCapture::stop() {
if (!capturing_.load()) return;
capturing_.store(false, std::memory_order_relaxed);
// Wait for task to exit
if (taskHandle_) {
vTaskDelay(pdMS_TO_TICKS(50));
taskHandle_ = nullptr;
}
if (i2sInitialized_) {
i2s_stop(I2S_NUM_1);
i2s_driver_uninstall(I2S_NUM_1);
i2sInitialized_ = false;
}
ESP_LOGI(TAG, "Capture stopped");
}
void I2SCapture::releaseBuffers() {
codec_ = nullptr; // Not owned — don't delete
delete filterChain_;
filterChain_ = nullptr;
delete encodedRing_;
encodedRing_ = nullptr;
free(accumBuffer_);
accumBuffer_ = nullptr;
free(silenceBuf_);
silenceBuf_ = nullptr;
accumCount_ = 0;
}
void I2SCapture::captureTask(void* param) {
auto* self = static_cast<I2SCapture*>(param);
self->captureLoop();
vTaskDelete(NULL);
}
void I2SCapture::captureLoop() {
// I2S read buffer: TDM interleaved, 2 channels at 8kHz
static constexpr int READ_SAMPLES = 256;
int16_t readBuf[READ_SAMPLES];
// CH0 mono after TDM deinterleave (÷2)
int16_t ch0Buf[READ_SAMPLES / 2];
size_t bytesRead = 0;
{
char logbuf[96];
snprintf(logbuf, sizeof(logbuf), "[CAP] Capture task on core %d, I2S=%dHz, codec=%dHz, stack=%d",
xPortGetCoreID(), I2S_SAMPLE_RATE, CODEC_SAMPLE_RATE, CAPTURE_TASK_STACK);
pyxis_log(logbuf);
}
uint32_t framesEncoded = 0;
uint32_t totalSamples = 0; // Total mono samples after deinterleave
uint32_t rateCheckMs = millis(); // For sample rate measurement
int16_t runningPeak = 0; // Peak of mono samples per interval
uint32_t ringDrops = 0; // Ring buffer overflow counter
while (capturing_.load(std::memory_order_relaxed)) {
// Read samples from I2S DMA (at 8kHz, TDM 2-ch)
esp_err_t err = i2s_read(I2S_NUM_1, readBuf, sizeof(readBuf), &bytesRead,
pdMS_TO_TICKS(100));
if (err != ESP_OK || bytesRead == 0) continue;
int samplesRead = bytesRead / sizeof(int16_t);
// Dump first raw I2S samples on each capture start
if (framesEncoded == 0 && samplesRead >= 16 && totalSamples == 0) {
char rawdump[192];
int pos = snprintf(rawdump, sizeof(rawdump),
"[CAP] Raw I2S (%d read, %zu bytes): ", samplesRead, bytesRead);
for (int d = 0; d < 16 && pos < 180; d++)
pos += snprintf(rawdump + pos, sizeof(rawdump) - pos, "%d ", readBuf[d]);
pyxis_log(rawdump);
}
// TDM deinterleave — extract CH0 (mic) at 8kHz.
// readBuf is [CH0,CH1,CH0,CH1,...], CH0 at even indices.
int ch0Count = samplesRead / 2;
for (int i = 0; i < ch0Count; i++) {
ch0Buf[i] = readBuf[i * 2];
int16_t v = ch0Buf[i] < 0 ? -ch0Buf[i] : ch0Buf[i];
if (v > runningPeak) runningPeak = v;
}
// Measure actual sample rate
totalSamples += ch0Count;
uint32_t now = millis();
uint32_t elapsed = now - rateCheckMs;
if (elapsed >= 2000) {
uint32_t rate = (totalSamples * 1000) / elapsed;
{
char logbuf[128];
snprintf(logbuf, sizeof(logbuf), "[CAP] rate=%luHz frames=%lu peak=%d ringDrops=%lu",
(unsigned long)rate, (unsigned long)framesEncoded,
runningPeak, (unsigned long)ringDrops);
pyxis_log(logbuf);
}
totalSamples = 0;
rateCheckMs = now;
runningPeak = 0;
}
// Accumulate mono samples into frame-sized buffer
int offset = 0;
while (offset < ch0Count && capturing_.load(std::memory_order_relaxed)) {
int needed = frameSamples_ - accumCount_;
int available = ch0Count - offset;
int toCopy = (available < needed) ? available : needed;
memcpy(accumBuffer_ + accumCount_, ch0Buf + offset, toCopy * sizeof(int16_t));
accumCount_ += toCopy;
offset += toCopy;
if (accumCount_ == frameSamples_) {
// Full frame ready — process it
int16_t* frameData = muted_.load(std::memory_order_relaxed)
? silenceBuf_ : accumBuffer_;
// Apply voice filters
if (filtersEnabled_ && filterChain_ && !muted_.load(std::memory_order_relaxed)) {
filterChain_->process(frameData, frameSamples_, CODEC_SAMPLE_RATE);
}
// Log PCM levels for first few frames and periodically
if (framesEncoded < 5 || (framesEncoded % 500 == 0)) {
int16_t maxVal = 0;
for (int s = 0; s < frameSamples_; s++) {
int16_t v = accumBuffer_[s] < 0 ? -accumBuffer_[s] : accumBuffer_[s];
if (v > maxVal) maxVal = v;
}
char logbuf[96];
snprintf(logbuf, sizeof(logbuf), "[CAP] PCM peak=%d (first=%d,%d,%d,%d)",
maxVal, accumBuffer_[0], accumBuffer_[1],
accumBuffer_[2], accumBuffer_[3]);
pyxis_log(logbuf);
}
// Encode
int encodedLen = codec_->encode(frameData, frameSamples_,
encodeBuf_, sizeof(encodeBuf_));
if (encodedLen > 0) {
framesEncoded++;
if (framesEncoded <= 3 || (framesEncoded % 500 == 0)) {
char logbuf[128];
char hex[64];
int hpos = 0;
for (int h = 0; h < encodedLen && h < 20 && hpos < 60; h++)
hpos += snprintf(hex + hpos, 64 - hpos, "%02X ", encodeBuf_[h]);
snprintf(logbuf, sizeof(logbuf), "[CAP] Encoded #%lu: %d bytes: %s",
(unsigned long)framesEncoded, encodedLen, hex);
pyxis_log(logbuf);
}
}
if (encodedLen > 0 && encodedRing_) {
if (!encodedRing_->write(encodeBuf_, encodedLen)) {
ringDrops++;
}
}
accumCount_ = 0;
}
}
}
ESP_LOGI(TAG, "Capture task exiting");
}
bool I2SCapture::readEncodedPacket(uint8_t* dest, int maxLength, int* actualLength) {
if (!encodedRing_) return false;
return encodedRing_->read(dest, maxLength, actualLength);
}
int I2SCapture::availablePackets() const {
if (!encodedRing_) return 0;
return encodedRing_->availableSlots();
}
#endif // ARDUINO