LXST audio hardware config: ES7210 mic pins, tone helpers, platformio deps

- Add ES7210 I2C address and I2S mic capture pin definitions
- Add ring/hangup tone helpers to Tone library
- Add lxst_audio library scaffold
- Add Codec2 dependency to platformio.ini

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
torlando-tech
2026-02-19 16:41:47 -05:00
parent 8f265da0bb
commit c1af11d75e
21 changed files with 2109 additions and 1 deletions

View File

@@ -0,0 +1,221 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#include "audio_filters.h"
#include <cmath>
#include <algorithm>
#include <cstring>
#include <cstdlib>
#ifdef ARDUINO
#include <esp_heap_caps.h>
#endif
// Matches AudioFilters.kt constants from LXST-kt
static constexpr float AGC_ATTACK_TIME = 0.0001f;
static constexpr float AGC_RELEASE_TIME = 0.002f;
static constexpr float AGC_HOLD_TIME = 0.001f;
static constexpr float AGC_TRIGGER_LEVEL = 0.003f;
static constexpr float AGC_PEAK_LIMIT = 0.75f;
static constexpr int AGC_BLOCK_TARGET = 10;
static void* psram_alloc(size_t bytes) {
#ifdef BOARD_HAS_PSRAM
return heap_caps_malloc(bytes, MALLOC_CAP_SPIRAM);
#else
return malloc(bytes);
#endif
}
VoiceFilterChain::VoiceFilterChain(int channels, float hpCutoff, float lpCutoff,
float agcTargetDb, float agcMaxGain)
: channels_(channels),
hpCutoff_(hpCutoff),
lpCutoff_(lpCutoff),
agcTargetDb_(agcTargetDb),
agcMaxGain_(agcMaxGain),
workBuffer_(nullptr) {
hp_.filterStates = static_cast<float*>(psram_alloc(sizeof(float) * channels));
hp_.lastInputs = static_cast<float*>(psram_alloc(sizeof(float) * channels));
lp_.filterStates = static_cast<float*>(psram_alloc(sizeof(float) * channels));
agc_.currentGain = static_cast<float*>(psram_alloc(sizeof(float) * channels));
for (int ch = 0; ch < channels; ++ch) {
hp_.filterStates[ch] = 0.0f;
hp_.lastInputs[ch] = 0.0f;
lp_.filterStates[ch] = 0.0f;
agc_.currentGain[ch] = 1.0f;
}
}
VoiceFilterChain::~VoiceFilterChain() {
free(hp_.filterStates);
free(hp_.lastInputs);
free(lp_.filterStates);
free(agc_.currentGain);
free(workBuffer_);
}
void VoiceFilterChain::process(int16_t* samples, int numSamples, int sampleRate) {
if (numSamples <= 0) return;
int numFrames = numSamples / channels_;
// Ensure work buffer is large enough
if (workBufferSize_ < numSamples) {
free(workBuffer_);
workBuffer_ = static_cast<float*>(psram_alloc(sizeof(float) * numSamples));
workBufferSize_ = numSamples;
}
// Convert int16 -> float [-1.0, 1.0]
for (int i = 0; i < numSamples; ++i) {
workBuffer_[i] = samples[i] / 32768.0f;
}
// Update coefficients if sample rate changed
if (hp_.sampleRate != sampleRate) {
hp_.sampleRate = sampleRate;
float dt = 1.0f / sampleRate;
float rc = 1.0f / (2.0f * static_cast<float>(M_PI) * hpCutoff_);
hp_.alpha = rc / (rc + dt);
}
if (lp_.sampleRate != sampleRate) {
lp_.sampleRate = sampleRate;
float dt = 1.0f / sampleRate;
float rc = 1.0f / (2.0f * static_cast<float>(M_PI) * lpCutoff_);
lp_.alpha = dt / (rc + dt);
}
if (agc_.sampleRate != sampleRate) {
agc_.sampleRate = sampleRate;
agc_.attackCoeff = 1.0f - expf(-1.0f / (AGC_ATTACK_TIME * sampleRate));
agc_.releaseCoeff = 1.0f - expf(-1.0f / (AGC_RELEASE_TIME * sampleRate));
agc_.holdSamples = static_cast<int>(AGC_HOLD_TIME * sampleRate);
}
// Apply filter chain: HPF -> LPF -> AGC
applyHighPass(workBuffer_, numFrames);
applyLowPass(workBuffer_, numFrames);
applyAGC(workBuffer_, numFrames);
// Convert float -> int16 with clipping
for (int i = 0; i < numSamples; ++i) {
float clamped = std::max(-1.0f, std::min(1.0f, workBuffer_[i]));
samples[i] = static_cast<int16_t>(clamped * 32767.0f);
}
}
void VoiceFilterChain::applyHighPass(float* samples, int numFrames) {
float alpha = hp_.alpha;
for (int ch = 0; ch < channels_; ++ch) {
float inputDiff = samples[ch] - hp_.lastInputs[ch];
samples[ch] = alpha * (hp_.filterStates[ch] + inputDiff);
}
for (int i = 1; i < numFrames; ++i) {
for (int ch = 0; ch < channels_; ++ch) {
int idx = i * channels_ + ch;
int prevIdx = (i - 1) * channels_ + ch;
float inputDiff = samples[idx] - samples[prevIdx];
samples[idx] = alpha * (samples[prevIdx] + inputDiff);
}
}
for (int ch = 0; ch < channels_; ++ch) {
int lastIdx = (numFrames - 1) * channels_ + ch;
hp_.filterStates[ch] = samples[lastIdx];
hp_.lastInputs[ch] = samples[lastIdx];
}
}
void VoiceFilterChain::applyLowPass(float* samples, int numFrames) {
float alpha = lp_.alpha;
float oneMinusAlpha = 1.0f - alpha;
for (int ch = 0; ch < channels_; ++ch) {
samples[ch] = alpha * samples[ch] + oneMinusAlpha * lp_.filterStates[ch];
}
for (int i = 1; i < numFrames; ++i) {
for (int ch = 0; ch < channels_; ++ch) {
int idx = i * channels_ + ch;
int prevIdx = (i - 1) * channels_ + ch;
samples[idx] = alpha * samples[idx] + oneMinusAlpha * samples[prevIdx];
}
}
for (int ch = 0; ch < channels_; ++ch) {
int lastIdx = (numFrames - 1) * channels_ + ch;
lp_.filterStates[ch] = samples[lastIdx];
}
}
void VoiceFilterChain::applyAGC(float* samples, int numFrames) {
float targetLinear = powf(10.0f, agcTargetDb_ / 10.0f);
float maxGainLinear = powf(10.0f, agcMaxGain_ / 10.0f);
int blockSize = std::max(1, numFrames / AGC_BLOCK_TARGET);
for (int block = 0; block < AGC_BLOCK_TARGET; ++block) {
int blockStart = block * blockSize;
int blockEnd = (block == AGC_BLOCK_TARGET - 1) ? numFrames : (block + 1) * blockSize;
if (blockEnd > numFrames) blockEnd = numFrames;
int blockSamples = blockEnd - blockStart;
if (blockSamples <= 0) continue;
for (int ch = 0; ch < channels_; ++ch) {
float sumSquares = 0.0f;
for (int i = blockStart; i < blockEnd; ++i) {
int idx = i * channels_ + ch;
sumSquares += samples[idx] * samples[idx];
}
float rms = sqrtf(sumSquares / blockSamples);
float targetGain;
if (rms > 1e-9f && rms > AGC_TRIGGER_LEVEL) {
targetGain = std::min(targetLinear / rms, maxGainLinear);
} else {
targetGain = agc_.currentGain[ch];
}
if (targetGain < agc_.currentGain[ch]) {
agc_.currentGain[ch] = agc_.attackCoeff * targetGain +
(1.0f - agc_.attackCoeff) * agc_.currentGain[ch];
agc_.holdCounter = agc_.holdSamples;
} else {
if (agc_.holdCounter > 0) {
agc_.holdCounter -= blockSamples;
} else {
agc_.currentGain[ch] = agc_.releaseCoeff * targetGain +
(1.0f - agc_.releaseCoeff) * agc_.currentGain[ch];
}
}
for (int i = blockStart; i < blockEnd; ++i) {
int idx = i * channels_ + ch;
samples[idx] *= agc_.currentGain[ch];
}
}
}
// Peak limiting
for (int ch = 0; ch < channels_; ++ch) {
float peak = 0.0f;
for (int i = 0; i < numFrames; ++i) {
int idx = i * channels_ + ch;
float absVal = fabsf(samples[idx]);
if (absVal > peak) peak = absVal;
}
if (peak > AGC_PEAK_LIMIT) {
float scale = AGC_PEAK_LIMIT / peak;
for (int i = 0; i < numFrames; ++i) {
int idx = i * channels_ + ch;
samples[idx] *= scale;
}
}
}
}

View File

@@ -0,0 +1,81 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#pragma once
#include <cstdint>
/**
* Native voice filter chain for LXST audio capture.
*
* Ported from LXST-kt native layer (native_audio_filters.h/cpp).
* Filter order: HighPass (300Hz) -> LowPass (3400Hz) -> AGC
*
* Processes int16 samples in-place. Internally converts to float
* for filter math and back to int16 on output.
*/
class VoiceFilterChain {
public:
/**
* @param channels Number of audio channels (1=mono for Codec2)
* @param hpCutoff High-pass cutoff frequency (Hz), typically 300
* @param lpCutoff Low-pass cutoff frequency (Hz), typically 3400
* @param agcTargetDb AGC target level in dBFS, typically -12
* @param agcMaxGain AGC maximum gain in dB, typically 12
*/
VoiceFilterChain(int channels, float hpCutoff, float lpCutoff,
float agcTargetDb, float agcMaxGain);
~VoiceFilterChain();
VoiceFilterChain(const VoiceFilterChain&) = delete;
VoiceFilterChain& operator=(const VoiceFilterChain&) = delete;
/**
* Process audio samples through the filter chain (in-place).
*
* @param samples int16 PCM samples (modified in-place)
* @param numSamples Total number of samples (frames * channels)
* @param sampleRate Sample rate in Hz
*/
void process(int16_t* samples, int numSamples, int sampleRate);
private:
struct HighPassState {
float* filterStates;
float* lastInputs;
float alpha = 0;
int sampleRate = 0;
};
struct LowPassState {
float* filterStates;
float alpha = 0;
int sampleRate = 0;
};
struct AGCState {
float* currentGain;
int holdCounter = 0;
int sampleRate = 0;
float attackCoeff = 0;
float releaseCoeff = 0;
int holdSamples = 0;
};
void applyHighPass(float* samples, int numFrames);
void applyLowPass(float* samples, int numFrames);
void applyAGC(float* samples, int numFrames);
int channels_;
float hpCutoff_;
float lpCutoff_;
float agcTargetDb_;
float agcMaxGain_;
HighPassState hp_;
LowPassState lp_;
AGCState agc_;
float* workBuffer_;
int workBufferSize_ = 0;
};

View File

@@ -0,0 +1,162 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#include "codec_wrapper.h"
#include <codec2.h>
#include <cstring>
#ifdef ARDUINO
#include <esp_log.h>
static const char* TAG = "LXST:Codec2";
#define LOGI(fmt, ...) ESP_LOGI(TAG, fmt, ##__VA_ARGS__)
#define LOGW(fmt, ...) ESP_LOGW(TAG, fmt, ##__VA_ARGS__)
#define LOGE(fmt, ...) ESP_LOGE(TAG, fmt, ##__VA_ARGS__)
#else
#include <cstdio>
#define LOGI(fmt, ...) printf("[INFO] " fmt "\n", ##__VA_ARGS__)
#define LOGW(fmt, ...) printf("[WARN] " fmt "\n", ##__VA_ARGS__)
#define LOGE(fmt, ...) printf("[ERR] " fmt "\n", ##__VA_ARGS__)
#endif
Codec2Wrapper::Codec2Wrapper() = default;
Codec2Wrapper::~Codec2Wrapper() {
destroy();
}
bool Codec2Wrapper::create(int libraryMode) {
destroy();
codec2_ = codec2_create(libraryMode);
if (!codec2_) {
LOGE("Codec2 create failed for library mode %d", libraryMode);
return false;
}
libraryMode_ = libraryMode;
samplesPerFrame_ = codec2_samples_per_frame(codec2_);
bytesPerFrame_ = codec2_bytes_per_frame(codec2_);
modeHeader_ = libraryModeToHeader(libraryMode);
LOGI("Codec2 created: libMode=%d header=0x%02x samples/frame=%d bytes/frame=%d",
libraryMode, modeHeader_, samplesPerFrame_, bytesPerFrame_);
return true;
}
void Codec2Wrapper::destroy() {
if (codec2_) {
codec2_destroy(codec2_);
codec2_ = nullptr;
}
samplesPerFrame_ = 0;
bytesPerFrame_ = 0;
modeHeader_ = 0;
libraryMode_ = 0;
}
int Codec2Wrapper::decode(const uint8_t* encoded, int encodedBytes,
int16_t* output, int maxOutputSamples) {
if (!codec2_ || encodedBytes < 1) return -1;
// First byte is mode header -- check if mode changed
uint8_t header = encoded[0];
if (header != modeHeader_) {
int newMode = headerToLibraryMode(header);
if (newMode >= 0) {
LOGI("Codec2 mode switch: header 0x%02x -> libMode %d", header, newMode);
codec2_destroy(codec2_);
codec2_ = codec2_create(newMode);
if (!codec2_) {
LOGE("Codec2 mode switch failed");
return -1;
}
libraryMode_ = newMode;
samplesPerFrame_ = codec2_samples_per_frame(codec2_);
bytesPerFrame_ = codec2_bytes_per_frame(codec2_);
modeHeader_ = header;
} else {
LOGW("Unknown Codec2 header: 0x%02x", header);
return -1;
}
}
// Skip header byte, decode remaining sub-frames
const uint8_t* data = encoded + 1;
int dataLen = encodedBytes - 1;
int numFrames = dataLen / bytesPerFrame_;
int totalSamples = numFrames * samplesPerFrame_;
if (totalSamples > maxOutputSamples) {
LOGW("Codec2 decode: output buffer too small (%d > %d)",
totalSamples, maxOutputSamples);
return -1;
}
for (int i = 0; i < numFrames; i++) {
codec2_decode(codec2_,
output + i * samplesPerFrame_,
data + i * bytesPerFrame_);
}
return totalSamples;
}
int Codec2Wrapper::encode(const int16_t* pcm, int pcmSamples,
uint8_t* output, int maxOutputBytes) {
if (!codec2_) return -1;
int numFrames = pcmSamples / samplesPerFrame_;
int encodedSize = 1 + numFrames * bytesPerFrame_;
if (encodedSize > maxOutputBytes) {
LOGW("Codec2 encode: output buffer too small (%d > %d)",
encodedSize, maxOutputBytes);
return -1;
}
// Prepend mode header byte
output[0] = modeHeader_;
for (int i = 0; i < numFrames; i++) {
codec2_encode(codec2_,
output + 1 + i * bytesPerFrame_,
const_cast<int16_t*>(pcm + i * samplesPerFrame_));
}
return encodedSize;
}
// Wire format mapping (matches Python LXST and LXST-kt Codec2.kt):
// header 0x00 = 700C -> library mode 8
// header 0x01 = 1200 -> library mode 5
// header 0x02 = 1300 -> library mode 4
// header 0x03 = 1400 -> library mode 3
// header 0x04 = 1600 -> library mode 2
// header 0x05 = 2400 -> library mode 1
// header 0x06 = 3200 -> library mode 0
int Codec2Wrapper::headerToLibraryMode(uint8_t header) {
switch (header) {
case 0x00: return 8; // 700C
case 0x01: return 5; // 1200
case 0x02: return 4; // 1300
case 0x03: return 3; // 1400
case 0x04: return 2; // 1600
case 0x05: return 1; // 2400
case 0x06: return 0; // 3200
default: return -1;
}
}
uint8_t Codec2Wrapper::libraryModeToHeader(int libraryMode) {
switch (libraryMode) {
case 8: return 0x00; // 700C
case 5: return 0x01; // 1200
case 4: return 0x02; // 1300
case 3: return 0x03; // 1400
case 2: return 0x04; // 1600
case 1: return 0x05; // 2400
case 0: return 0x06; // 3200
default: return 0xFF;
}
}

View File

@@ -0,0 +1,86 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#pragma once
#include <cstdint>
struct CODEC2;
/**
* Codec2 wrapper for LXST voice streaming on ESP32.
*
* Ported from LXST-kt CodecWrapper (codec_wrapper.h/cpp), stripped to
* Codec2-only (no Opus) for the ESP32-S3 resource budget.
*
* Handles the LXST wire format:
* Encoded packet = [1-byte mode header] + [N codec2 sub-frames]
*
* Wire-compatible with LXST-kt (Android/Columba) and Python LXST.
*
* Mode header mapping (matches Codec2.kt and Python LXST):
* 0x00 = 700C (lib mode 8) - 700bps, 40ms frames, 7 bytes/frame
* 0x04 = 1600 (lib mode 2) - 1600bps, 20ms frames, 8 bytes/frame
* 0x06 = 3200 (lib mode 0) - 3200bps, 20ms frames, 8 bytes/frame
*/
class Codec2Wrapper {
public:
Codec2Wrapper();
~Codec2Wrapper();
Codec2Wrapper(const Codec2Wrapper&) = delete;
Codec2Wrapper& operator=(const Codec2Wrapper&) = delete;
/**
* Create a Codec2 encoder+decoder instance.
* @param libraryMode Codec2 library mode (0=3200, 2=1600, 8=700C)
* @return true on success
*/
bool create(int libraryMode);
/** Destroy the codec and release all resources. */
void destroy();
/**
* Decode encoded bytes to PCM int16.
* Strips mode header byte, loops over sub-frames.
* Handles dynamic mode switching if header changes.
*
* @param encoded Encoded data (with mode header byte)
* @param encodedBytes Length of encoded data
* @param output Output PCM int16 buffer
* @param maxOutputSamples Maximum samples that fit in output buffer
* @return Decoded sample count, or -1 on error
*/
int decode(const uint8_t* encoded, int encodedBytes,
int16_t* output, int maxOutputSamples);
/**
* Encode PCM int16 to encoded bytes.
* Prepends mode header byte, loops over sub-frames.
*
* @param pcm Input PCM int16 samples (8kHz mono)
* @param pcmSamples Number of input samples
* @param output Output buffer for encoded data
* @param maxOutputBytes Maximum bytes that fit in output buffer
* @return Encoded byte count, or -1 on error
*/
int encode(const int16_t* pcm, int pcmSamples,
uint8_t* output, int maxOutputBytes);
bool isCreated() const { return codec2_ != nullptr; }
int samplesPerFrame() const { return samplesPerFrame_; }
int bytesPerFrame() const { return bytesPerFrame_; }
uint8_t modeHeader() const { return modeHeader_; }
int libraryMode() const { return libraryMode_; }
private:
struct CODEC2* codec2_ = nullptr;
int samplesPerFrame_ = 0;
int bytesPerFrame_ = 0;
uint8_t modeHeader_ = 0;
int libraryMode_ = 0;
static int headerToLibraryMode(uint8_t header);
static uint8_t libraryModeToHeader(int libraryMode);
};

View File

@@ -0,0 +1,83 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#include "encoded_ring_buffer.h"
#include <cstring>
#ifdef ARDUINO
#include <esp_heap_caps.h>
#endif
EncodedRingBuffer::EncodedRingBuffer(int maxSlots, int maxBytesPerSlot)
: maxSlots_(maxSlots),
maxBytesPerSlot_(maxBytesPerSlot),
slotSize_(static_cast<int>(sizeof(int32_t)) + maxBytesPerSlot) {
size_t bytes = maxSlots * slotSize_;
#ifdef BOARD_HAS_PSRAM
buffer_ = static_cast<uint8_t*>(heap_caps_malloc(bytes, MALLOC_CAP_SPIRAM));
#else
buffer_ = static_cast<uint8_t*>(malloc(bytes));
#endif
if (buffer_) {
memset(buffer_, 0, bytes);
}
}
EncodedRingBuffer::~EncodedRingBuffer() {
free(buffer_);
}
bool EncodedRingBuffer::write(const uint8_t* data, int length) {
if (length <= 0 || length > maxBytesPerSlot_ || !buffer_) return false;
int w = writeIndex_.load(std::memory_order_relaxed);
int r = readIndex_.load(std::memory_order_acquire);
int nextW = (w + 1) % maxSlots_;
if (nextW == r) return false;
uint8_t* slot = buffer_ + w * slotSize_;
memcpy(slot, &length, sizeof(int32_t));
memcpy(slot + sizeof(int32_t), data, length);
writeIndex_.store(nextW, std::memory_order_release);
return true;
}
bool EncodedRingBuffer::read(uint8_t* dest, int maxLength, int* actualLength) {
if (!buffer_) return false;
int r = readIndex_.load(std::memory_order_relaxed);
int w = writeIndex_.load(std::memory_order_acquire);
if (r == w) return false;
uint8_t* slot = buffer_ + r * slotSize_;
int32_t length;
memcpy(&length, slot, sizeof(int32_t));
if (length > maxLength) {
readIndex_.store((r + 1) % maxSlots_, std::memory_order_release);
*actualLength = 0;
return false;
}
memcpy(dest, slot + sizeof(int32_t), length);
*actualLength = length;
readIndex_.store((r + 1) % maxSlots_, std::memory_order_release);
return true;
}
int EncodedRingBuffer::availableSlots() const {
int w = writeIndex_.load(std::memory_order_acquire);
int r = readIndex_.load(std::memory_order_acquire);
int avail = w - r;
if (avail < 0) avail += maxSlots_;
return avail;
}
void EncodedRingBuffer::reset() {
writeIndex_.store(0, std::memory_order_relaxed);
readIndex_.store(0, std::memory_order_relaxed);
}

View File

@@ -0,0 +1,39 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#pragma once
#include <atomic>
#include <cstdint>
/**
* Lock-free SPSC ring buffer for variable-length encoded audio packets.
*
* Ported from LXST-kt native layer. Each slot has a fixed max size but
* tracks actual length. Lock-free protocol identical to PacketRingBuffer.
*
* Slot layout: [int32 length][uint8 data[maxBytesPerSlot]] x maxSlots
*/
class EncodedRingBuffer {
public:
EncodedRingBuffer(int maxSlots, int maxBytesPerSlot);
~EncodedRingBuffer();
EncodedRingBuffer(const EncodedRingBuffer&) = delete;
EncodedRingBuffer& operator=(const EncodedRingBuffer&) = delete;
bool write(const uint8_t* data, int length);
bool read(uint8_t* dest, int maxLength, int* actualLength);
int availableSlots() const;
void reset();
private:
const int maxSlots_;
const int maxBytesPerSlot_;
const int slotSize_;
uint8_t* buffer_;
std::atomic<int> writeIndex_{0};
std::atomic<int> readIndex_{0};
};

162
lib/lxst_audio/es7210.cpp Normal file
View File

@@ -0,0 +1,162 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#include "es7210.h"
#ifdef ARDUINO
#include <Wire.h>
#include <esp_log.h>
static const char* TAG = "ES7210";
// Clock coefficient table for 8kHz with MCLK = 4.096MHz (256 * 16kHz)
// From ESPHome/Espressif ES7210 driver
// Fields: mclk, lrclk, ss_ds, adc_div, dll, doubler, osr, mclk_src, lrck_h, lrck_l
struct ClockCoeff {
uint32_t mclk;
uint32_t lrclk;
uint8_t adc_div;
uint8_t dll;
uint8_t doubler;
uint8_t osr;
uint8_t lrck_h;
uint8_t lrck_l;
};
// 8kHz with MCLK = 4.096MHz = 512 * 8kHz
static constexpr ClockCoeff CLOCK_8K = {
.mclk = 4096000, .lrclk = 8000,
.adc_div = 0x01, .dll = 0x01, .doubler = 0x00,
.osr = 0x20, .lrck_h = 0x02, .lrck_l = 0x00
};
static bool writeReg(uint8_t addr, uint8_t reg, uint8_t val) {
Wire.beginTransmission(addr);
Wire.write(reg);
Wire.write(val);
uint8_t err = Wire.endTransmission();
if (err != 0) {
ESP_LOGE(TAG, "I2C write failed: reg=0x%02x val=0x%02x err=%d", reg, val, err);
return false;
}
return true;
}
static bool readReg(uint8_t addr, uint8_t reg, uint8_t* val) {
Wire.beginTransmission(addr);
Wire.write(reg);
if (Wire.endTransmission(false) != 0) return false;
if (Wire.requestFrom(addr, (uint8_t)1) != 1) return false;
*val = Wire.read();
return true;
}
static bool updateRegBit(uint8_t addr, uint8_t reg, uint8_t mask, uint8_t data) {
uint8_t regv;
if (!readReg(addr, reg, &regv)) return false;
regv = (regv & (~mask)) | (mask & data);
return writeReg(addr, reg, regv);
}
namespace ES7210 {
bool init(uint8_t i2cAddr, MicGain gain) {
ESP_LOGI(TAG, "Initializing ES7210 at 0x%02x, gain=%d", i2cAddr, gain);
// Software reset
if (!writeReg(i2cAddr, REG_RESET, 0xFF)) return false;
if (!writeReg(i2cAddr, REG_RESET, 0x32)) return false;
if (!writeReg(i2cAddr, REG_CLOCK_OFF, 0x3F)) return false;
// Set initialization and power-up timing
if (!writeReg(i2cAddr, REG_TIME_CTRL0, 0x30)) return false;
if (!writeReg(i2cAddr, REG_TIME_CTRL1, 0x30)) return false;
// Configure high-pass filters for all ADC channels
if (!writeReg(i2cAddr, REG_ADC12_HPF2, 0x2A)) return false;
if (!writeReg(i2cAddr, REG_ADC12_HPF1, 0x0A)) return false;
if (!writeReg(i2cAddr, REG_ADC34_HPF2, 0x0A)) return false;
if (!writeReg(i2cAddr, REG_ADC34_HPF1, 0x2A)) return false;
// Secondary (slave) I2S mode — ESP32 provides clocks
if (!updateRegBit(i2cAddr, REG_MODE_CONFIG, 0x01, 0x00)) return false;
// Configure analog power
if (!writeReg(i2cAddr, REG_ANALOG, 0xC3)) return false;
// Set mic bias to 2.87V (0x70)
if (!writeReg(i2cAddr, REG_MIC12_BIAS, 0x70)) return false;
if (!writeReg(i2cAddr, REG_MIC34_BIAS, 0x70)) return false;
// Configure I2S format: 16-bit, standard I2S
// Bits per sample: 0x60 = 16-bit
if (!writeReg(i2cAddr, REG_SDP_IFACE1, 0x60)) return false;
// Normal mode (not TDM): mic1&2 on SDOUT1, mic3&4 on SDOUT2
if (!writeReg(i2cAddr, REG_SDP_IFACE2, 0x00)) return false;
// Configure clock for 8kHz with MCLK = 4.096MHz
{
uint8_t regv = CLOCK_8K.adc_div
| (CLOCK_8K.doubler << 6)
| (CLOCK_8K.dll << 7);
if (!writeReg(i2cAddr, REG_MAINCLK, regv)) return false;
if (!writeReg(i2cAddr, REG_OSR, CLOCK_8K.osr)) return false;
if (!writeReg(i2cAddr, REG_LRCK_DIVH, CLOCK_8K.lrck_h)) return false;
if (!writeReg(i2cAddr, REG_LRCK_DIVL, CLOCK_8K.lrck_l)) return false;
}
// Power on mic channels
if (!writeReg(i2cAddr, REG_MIC1_POWER, 0x08)) return false;
if (!writeReg(i2cAddr, REG_MIC2_POWER, 0x08)) return false;
if (!writeReg(i2cAddr, REG_MIC3_POWER, 0x08)) return false;
if (!writeReg(i2cAddr, REG_MIC4_POWER, 0x08)) return false;
// Power down DLL
if (!writeReg(i2cAddr, REG_POWER_DOWN, 0x04)) return false;
// Power on MIC bias & ADC & PGA
if (!writeReg(i2cAddr, REG_MIC12_POWER, 0x0F)) return false;
if (!writeReg(i2cAddr, REG_MIC34_POWER, 0x0F)) return false;
// Set mic gain
if (!setGain(i2cAddr, gain)) return false;
// Enable device
if (!writeReg(i2cAddr, REG_RESET, 0x71)) return false;
if (!writeReg(i2cAddr, REG_RESET, 0x41)) return false;
ESP_LOGI(TAG, "ES7210 initialized successfully");
return true;
}
bool setGain(uint8_t i2cAddr, MicGain gain) {
uint8_t regv = static_cast<uint8_t>(gain);
// Clear PGA gain for all mics
for (uint8_t i = 0; i < 4; ++i) {
if (!updateRegBit(i2cAddr, REG_MIC1_GAIN + i, 0x10, 0x00)) return false;
}
// Disable ADC power temporarily
if (!writeReg(i2cAddr, REG_MIC12_POWER, 0xFF)) return false;
if (!writeReg(i2cAddr, REG_MIC34_POWER, 0xFF)) return false;
// Configure each mic gain
for (uint8_t i = 0; i < 4; ++i) {
if (!updateRegBit(i2cAddr, REG_CLOCK_OFF, 0x0B, 0x00)) return false;
if (i < 2) {
if (!writeReg(i2cAddr, REG_MIC12_POWER, 0x00)) return false;
} else {
if (!writeReg(i2cAddr, REG_MIC34_POWER, 0x00)) return false;
}
if (!updateRegBit(i2cAddr, REG_MIC1_GAIN + i, 0x10, 0x10)) return false;
if (!updateRegBit(i2cAddr, REG_MIC1_GAIN + i, 0x0F, regv)) return false;
}
ESP_LOGI(TAG, "Mic gain set to %d", gain);
return true;
}
} // namespace ES7210
#endif // ARDUINO

93
lib/lxst_audio/es7210.h Normal file
View File

@@ -0,0 +1,93 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#pragma once
#include <cstdint>
/**
* ES7210 four-channel audio ADC I2C driver for T-Deck Plus.
*
* Adapted from ESPHome es7210 component and Espressif ESP-BSP driver.
* Handles only I2C configuration registers — audio data flows over I2S.
*
* After init(), the ES7210 outputs 8kHz 16-bit audio on its I2S SDOUT1
* pin (mics 1&2). The caller is responsible for I2S port setup.
*/
namespace ES7210 {
// ES7210 register addresses
static constexpr uint8_t REG_RESET = 0x00;
static constexpr uint8_t REG_CLOCK_OFF = 0x01;
static constexpr uint8_t REG_MAINCLK = 0x02;
static constexpr uint8_t REG_MASTER_CLK = 0x03;
static constexpr uint8_t REG_LRCK_DIVH = 0x04;
static constexpr uint8_t REG_LRCK_DIVL = 0x05;
static constexpr uint8_t REG_POWER_DOWN = 0x06;
static constexpr uint8_t REG_OSR = 0x07;
static constexpr uint8_t REG_MODE_CONFIG = 0x08;
static constexpr uint8_t REG_TIME_CTRL0 = 0x09;
static constexpr uint8_t REG_TIME_CTRL1 = 0x0A;
static constexpr uint8_t REG_SDP_IFACE1 = 0x11;
static constexpr uint8_t REG_SDP_IFACE2 = 0x12;
static constexpr uint8_t REG_ADC_AUTOMUTE = 0x13;
static constexpr uint8_t REG_ADC34_HPF2 = 0x20;
static constexpr uint8_t REG_ADC34_HPF1 = 0x21;
static constexpr uint8_t REG_ADC12_HPF1 = 0x22;
static constexpr uint8_t REG_ADC12_HPF2 = 0x23;
static constexpr uint8_t REG_ANALOG = 0x40;
static constexpr uint8_t REG_MIC12_BIAS = 0x41;
static constexpr uint8_t REG_MIC34_BIAS = 0x42;
static constexpr uint8_t REG_MIC1_GAIN = 0x43;
static constexpr uint8_t REG_MIC2_GAIN = 0x44;
static constexpr uint8_t REG_MIC3_GAIN = 0x45;
static constexpr uint8_t REG_MIC4_GAIN = 0x46;
static constexpr uint8_t REG_MIC1_POWER = 0x47;
static constexpr uint8_t REG_MIC2_POWER = 0x48;
static constexpr uint8_t REG_MIC3_POWER = 0x49;
static constexpr uint8_t REG_MIC4_POWER = 0x4A;
static constexpr uint8_t REG_MIC12_POWER = 0x4B;
static constexpr uint8_t REG_MIC34_POWER = 0x4C;
// Mic gain in dB (0 to 37.5 in 3dB steps)
enum MicGain : uint8_t {
GAIN_0DB = 0,
GAIN_3DB = 1,
GAIN_6DB = 2,
GAIN_9DB = 3,
GAIN_12DB = 4,
GAIN_15DB = 5,
GAIN_18DB = 6,
GAIN_21DB = 7,
GAIN_24DB = 8,
GAIN_27DB = 9,
GAIN_30DB = 10,
GAIN_33DB = 11,
GAIN_34_5DB = 12,
GAIN_36DB = 13,
GAIN_37_5DB = 14,
};
/**
* Initialize the ES7210 for 8kHz 16-bit I2S capture.
*
* Uses the shared Wire bus (must already be initialized).
* Configures mic1 channel with specified gain.
*
* @param i2cAddr I2C address (default 0x40)
* @param gain Microphone gain setting (default 24dB)
* @return true if all I2C writes succeeded
*/
bool init(uint8_t i2cAddr = 0x40, MicGain gain = GAIN_24DB);
/**
* Set microphone gain for all channels.
* Can be called after init() to adjust gain dynamically.
*
* @param i2cAddr I2C address
* @param gain New gain setting
* @return true on success
*/
bool setGain(uint8_t i2cAddr, MicGain gain);
} // namespace ES7210

View File

@@ -0,0 +1,236 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#include "i2s_capture.h"
#ifdef ARDUINO
#include <cstring>
#include <driver/i2s.h>
#include <esp_log.h>
#include <esp_heap_caps.h>
#include <Hardware/TDeck/Config.h>
#include "codec_wrapper.h"
#include "audio_filters.h"
#include "encoded_ring_buffer.h"
using namespace Hardware::TDeck;
static const char* TAG = "LXST:Capture";
I2SCapture::I2SCapture() = default;
I2SCapture::~I2SCapture() {
stop();
destroyEncoder();
}
bool I2SCapture::init() {
if (i2sInitialized_) return true;
// Configure I2S_NUM_1 for mic capture from ES7210
i2s_config_t i2s_config = {};
i2s_config.mode = static_cast<i2s_mode_t>(I2S_MODE_MASTER | I2S_MODE_RX);
i2s_config.sample_rate = SAMPLE_RATE;
i2s_config.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT;
// ES7210 in normal mode outputs stereo (mic1=L, mic2=R) on SDOUT1
// We capture both channels and extract mic1 (left channel only)
i2s_config.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT;
i2s_config.communication_format = I2S_COMM_FORMAT_STAND_I2S;
i2s_config.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1;
i2s_config.dma_buf_count = 4;
i2s_config.dma_buf_len = 128;
i2s_config.use_apll = false;
i2s_config.tx_desc_auto_clear = false;
i2s_config.fixed_mclk = 4096000; // 4.096MHz MCLK for ES7210 at 8kHz
esp_err_t err = i2s_driver_install(I2S_NUM_1, &i2s_config, 0, NULL);
if (err != ESP_OK) {
ESP_LOGE(TAG, "I2S_NUM_1 driver install failed: %d", err);
return false;
}
i2s_pin_config_t pin_config = {};
pin_config.mck_io_num = Audio::MIC_MCLK;
pin_config.bck_io_num = Audio::MIC_SCK;
pin_config.ws_io_num = Audio::MIC_LRCK;
pin_config.data_in_num = Audio::MIC_DIN;
pin_config.data_out_num = I2S_PIN_NO_CHANGE;
err = i2s_set_pin(I2S_NUM_1, &pin_config);
if (err != ESP_OK) {
ESP_LOGE(TAG, "I2S_NUM_1 pin config failed: %d", err);
i2s_driver_uninstall(I2S_NUM_1);
return false;
}
i2sInitialized_ = true;
ESP_LOGI(TAG, "I2S capture initialized: %dHz 16-bit mono, MCLK=4.096MHz", SAMPLE_RATE);
return true;
}
bool I2SCapture::configureEncoder(int codec2Mode, bool enableFilters) {
destroyEncoder();
encoder_ = new Codec2Wrapper();
if (!encoder_->create(codec2Mode)) {
ESP_LOGE(TAG, "Failed to create Codec2 encoder mode %d", codec2Mode);
delete encoder_;
encoder_ = nullptr;
return false;
}
frameSamples_ = encoder_->samplesPerFrame();
filtersEnabled_ = enableFilters;
// Allocate ring buffer in PSRAM
encodedRing_ = new EncodedRingBuffer(ENCODED_RING_SLOTS, ENCODED_RING_MAX_BYTES);
// Allocate accumulation buffer in PSRAM
accumBuffer_ = static_cast<int16_t*>(
heap_caps_malloc(sizeof(int16_t) * frameSamples_, MALLOC_CAP_SPIRAM));
accumCount_ = 0;
// Silence buffer for mute
silenceBuf_ = static_cast<int16_t*>(
heap_caps_calloc(frameSamples_, sizeof(int16_t), MALLOC_CAP_SPIRAM));
// Filter chain: 1 channel (mono), voice band 300-3400Hz, AGC -12dB target, 12dB max
if (enableFilters) {
filterChain_ = new VoiceFilterChain(1, 300.0f, 3400.0f, -12.0f, 12.0f);
}
ESP_LOGI(TAG, "Encoder configured: Codec2 mode %d, %d samples/frame, %d bytes/frame, filters=%d",
codec2Mode, frameSamples_, encoder_->bytesPerFrame(), enableFilters);
return true;
}
bool I2SCapture::start() {
if (!i2sInitialized_ || !encoder_ || capturing_.load()) return false;
// Set capturing BEFORE starting task to avoid race (same pattern as LXST-kt)
capturing_.store(true, std::memory_order_relaxed);
BaseType_t ret = xTaskCreatePinnedToCore(
captureTask, "lxst_cap", CAPTURE_TASK_STACK, this,
CAPTURE_TASK_PRIORITY, reinterpret_cast<TaskHandle_t*>(&taskHandle_),
CAPTURE_TASK_CORE);
if (ret != pdPASS) {
ESP_LOGE(TAG, "Failed to create capture task");
capturing_.store(false, std::memory_order_relaxed);
return false;
}
ESP_LOGI(TAG, "Capture started");
return true;
}
void I2SCapture::stop() {
if (!capturing_.load()) return;
capturing_.store(false, std::memory_order_relaxed);
// Wait for task to exit
if (taskHandle_) {
vTaskDelay(pdMS_TO_TICKS(50));
taskHandle_ = nullptr;
}
if (i2sInitialized_) {
i2s_stop(I2S_NUM_1);
i2s_driver_uninstall(I2S_NUM_1);
i2sInitialized_ = false;
}
ESP_LOGI(TAG, "Capture stopped");
}
void I2SCapture::destroyEncoder() {
delete encoder_;
encoder_ = nullptr;
delete filterChain_;
filterChain_ = nullptr;
delete encodedRing_;
encodedRing_ = nullptr;
free(accumBuffer_);
accumBuffer_ = nullptr;
free(silenceBuf_);
silenceBuf_ = nullptr;
accumCount_ = 0;
}
void I2SCapture::captureTask(void* param) {
auto* self = static_cast<I2SCapture*>(param);
self->captureLoop();
vTaskDelete(NULL);
}
void I2SCapture::captureLoop() {
// I2S read buffer: read in chunks smaller than a codec frame
static constexpr int READ_SAMPLES = 128;
int16_t readBuf[READ_SAMPLES];
size_t bytesRead = 0;
ESP_LOGI(TAG, "Capture task running on core %d", xPortGetCoreID());
while (capturing_.load(std::memory_order_relaxed)) {
// Read samples from I2S DMA
esp_err_t err = i2s_read(I2S_NUM_1, readBuf, sizeof(readBuf), &bytesRead,
pdMS_TO_TICKS(100));
if (err != ESP_OK || bytesRead == 0) continue;
int samplesRead = bytesRead / sizeof(int16_t);
// Accumulate into frame-sized buffer
int offset = 0;
while (offset < samplesRead && capturing_.load(std::memory_order_relaxed)) {
int needed = frameSamples_ - accumCount_;
int available = samplesRead - offset;
int toCopy = (available < needed) ? available : needed;
memcpy(accumBuffer_ + accumCount_, readBuf + offset, toCopy * sizeof(int16_t));
accumCount_ += toCopy;
offset += toCopy;
if (accumCount_ == frameSamples_) {
// Full frame ready — process it
int16_t* frameData = muted_.load(std::memory_order_relaxed)
? silenceBuf_ : accumBuffer_;
// Apply voice filters
if (filtersEnabled_ && filterChain_ && !muted_.load(std::memory_order_relaxed)) {
filterChain_->process(frameData, frameSamples_, SAMPLE_RATE);
}
// Encode
int encodedLen = encoder_->encode(frameData, frameSamples_,
encodeBuf_, sizeof(encodeBuf_));
if (encodedLen > 0 && encodedRing_) {
if (!encodedRing_->write(encodeBuf_, encodedLen)) {
// Ring full — drop oldest, then write
uint8_t discard[256];
int discardLen;
encodedRing_->read(discard, sizeof(discard), &discardLen);
encodedRing_->write(encodeBuf_, encodedLen);
}
}
accumCount_ = 0;
}
}
}
ESP_LOGI(TAG, "Capture task exiting");
}
bool I2SCapture::readEncodedPacket(uint8_t* dest, int maxLength, int* actualLength) {
if (!encodedRing_) return false;
return encodedRing_->read(dest, maxLength, actualLength);
}
int I2SCapture::availablePackets() const {
if (!encodedRing_) return 0;
return encodedRing_->availableSlots();
}
#endif // ARDUINO

View File

@@ -0,0 +1,111 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#pragma once
#include <cstdint>
#include <atomic>
class PacketRingBuffer;
class EncodedRingBuffer;
class VoiceFilterChain;
class Codec2Wrapper;
/**
* ESP32 I2S microphone capture engine for LXST voice streaming.
*
* Uses I2S_NUM_1 to capture audio from the ES7210 mic array.
* Runs a FreeRTOS task that reads I2S DMA, applies voice filters,
* encodes with Codec2, and writes to an EncodedRingBuffer for
* the network layer to consume.
*
* Audio flow:
* I2S DMA -> accumulate to frame -> filter -> encode -> EncodedRingBuffer
*/
class I2SCapture {
public:
I2SCapture();
~I2SCapture();
I2SCapture(const I2SCapture&) = delete;
I2SCapture& operator=(const I2SCapture&) = delete;
/**
* Initialize the I2S capture port.
* Does NOT start capturing — call start() after init.
* @return true on success
*/
bool init();
/**
* Configure the encoder. Must be called before start().
* @param codec2Mode Codec2 library mode (0=3200, 2=1600, 8=700C)
* @param enableFilters Whether to apply HPF+LPF+AGC filter chain
* @return true on success
*/
bool configureEncoder(int codec2Mode, bool enableFilters = true);
/** Start the capture task. Returns immediately. */
bool start();
/** Stop the capture task and release I2S resources. */
void stop();
/** Mute/unmute the microphone (sends silence when muted). */
void setMute(bool muted) { muted_.store(muted, std::memory_order_relaxed); }
bool isMuted() const { return muted_.load(std::memory_order_relaxed); }
/** Check if currently capturing. */
bool isCapturing() const { return capturing_.load(std::memory_order_relaxed); }
/**
* Read the next encoded packet from the ring buffer.
* Called by the network layer.
*
* @param dest Output buffer for encoded packet
* @param maxLength Size of output buffer
* @param actualLength [out] Actual packet size
* @return true if a packet was read
*/
bool readEncodedPacket(uint8_t* dest, int maxLength, int* actualLength);
/** Number of encoded packets waiting in the ring buffer. */
int availablePackets() const;
/** Destroy the encoder and release codec resources. */
void destroyEncoder();
private:
static void captureTask(void* param);
void captureLoop();
bool i2sInitialized_ = false;
std::atomic<bool> capturing_{false};
std::atomic<bool> muted_{false};
void* taskHandle_ = nullptr;
// Audio pipeline components (owned)
Codec2Wrapper* encoder_ = nullptr;
VoiceFilterChain* filterChain_ = nullptr;
EncodedRingBuffer* encodedRing_ = nullptr;
// Accumulation buffer: I2S delivers variable bursts, we need fixed-size frames
int16_t* accumBuffer_ = nullptr;
int accumCount_ = 0;
int frameSamples_ = 0; // Codec2 samples per frame (e.g., 320 for 700C, 160 for 1600/3200)
// Pre-allocated encode output buffer
uint8_t encodeBuf_[256];
// Silence buffer for mute
int16_t* silenceBuf_ = nullptr;
bool filtersEnabled_ = true;
static constexpr int SAMPLE_RATE = 8000;
static constexpr int ENCODED_RING_SLOTS = 32;
static constexpr int ENCODED_RING_MAX_BYTES = 256;
static constexpr int CAPTURE_TASK_STACK = 4096;
static constexpr int CAPTURE_TASK_PRIORITY = 5;
static constexpr int CAPTURE_TASK_CORE = 0;
};

View File

@@ -0,0 +1,249 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#include "i2s_playback.h"
#ifdef ARDUINO
#include <driver/i2s.h>
#include <esp_log.h>
#include <esp_heap_caps.h>
#include <Hardware/TDeck/Config.h>
#include "codec_wrapper.h"
#include "packet_ring_buffer.h"
using namespace Hardware::TDeck;
static const char* TAG = "LXST:Playback";
I2SPlayback::I2SPlayback() = default;
I2SPlayback::~I2SPlayback() {
stop();
destroyDecoder();
}
bool I2SPlayback::configureDecoder(int codec2Mode) {
destroyDecoder();
decoder_ = new Codec2Wrapper();
if (!decoder_->create(codec2Mode)) {
ESP_LOGE(TAG, "Failed to create Codec2 decoder mode %d", codec2Mode);
delete decoder_;
decoder_ = nullptr;
return false;
}
frameSamples_ = decoder_->samplesPerFrame();
// PCM ring buffer in PSRAM
pcmRing_ = new PacketRingBuffer(PCM_RING_FRAMES, frameSamples_);
// Decode buffer in PSRAM
decodeBufSize_ = frameSamples_ * 2; // Extra room for mode switches
decodeBuf_ = static_cast<int16_t*>(
heap_caps_malloc(sizeof(int16_t) * decodeBufSize_, MALLOC_CAP_SPIRAM));
// Drop buffer (for ring overflow discard)
dropBuf_ = static_cast<int16_t*>(
heap_caps_malloc(sizeof(int16_t) * frameSamples_, MALLOC_CAP_SPIRAM));
ESP_LOGI(TAG, "Decoder configured: Codec2 mode %d, %d samples/frame",
codec2Mode, frameSamples_);
return true;
}
bool I2SPlayback::start() {
if (!decoder_ || playing_.load()) return false;
// Caller (LXSTAudio) is responsible for calling tone_deinit() first.
// Defensively uninstall in case it wasn't done.
i2s_driver_uninstall(I2S_NUM_0);
// Configure I2S_NUM_0 for voice playback
i2s_config_t i2s_config = {};
i2s_config.mode = static_cast<i2s_mode_t>(I2S_MODE_MASTER | I2S_MODE_TX);
i2s_config.sample_rate = SAMPLE_RATE;
i2s_config.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT;
i2s_config.channel_format = I2S_CHANNEL_FMT_ONLY_LEFT;
i2s_config.communication_format = I2S_COMM_FORMAT_STAND_I2S;
i2s_config.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1;
i2s_config.dma_buf_count = 8;
i2s_config.dma_buf_len = 64;
i2s_config.use_apll = false;
i2s_config.tx_desc_auto_clear = true;
i2s_config.fixed_mclk = 0;
esp_err_t err = i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);
if (err != ESP_OK) {
ESP_LOGE(TAG, "I2S_NUM_0 driver install failed: %d", err);
return false;
}
i2s_pin_config_t pin_config = {};
pin_config.mck_io_num = I2S_PIN_NO_CHANGE;
pin_config.bck_io_num = Audio::I2S_BCK;
pin_config.ws_io_num = Audio::I2S_WS;
pin_config.data_out_num = Audio::I2S_DOUT;
pin_config.data_in_num = I2S_PIN_NO_CHANGE;
err = i2s_set_pin(I2S_NUM_0, &pin_config);
if (err != ESP_OK) {
ESP_LOGE(TAG, "I2S_NUM_0 pin config failed: %d", err);
i2s_driver_uninstall(I2S_NUM_0);
return false;
}
i2sInitialized_ = true;
// Reset ring and prebuffer state
if (pcmRing_) pcmRing_->reset();
playing_.store(true, std::memory_order_relaxed);
BaseType_t ret = xTaskCreatePinnedToCore(
playbackTask, "lxst_play", PLAYBACK_TASK_STACK, this,
PLAYBACK_TASK_PRIORITY, reinterpret_cast<TaskHandle_t*>(&taskHandle_),
PLAYBACK_TASK_CORE);
if (ret != pdPASS) {
ESP_LOGE(TAG, "Failed to create playback task");
playing_.store(false, std::memory_order_relaxed);
i2s_driver_uninstall(I2S_NUM_0);
i2sInitialized_ = false;
return false;
}
ESP_LOGI(TAG, "Playback started");
return true;
}
void I2SPlayback::stop() {
if (!playing_.load()) return;
playing_.store(false, std::memory_order_relaxed);
if (taskHandle_) {
vTaskDelay(pdMS_TO_TICKS(50));
taskHandle_ = nullptr;
}
if (i2sInitialized_) {
// Write silence to flush DMA
int16_t silence[128] = {0};
size_t written;
i2s_write(I2S_NUM_0, silence, sizeof(silence), &written, pdMS_TO_TICKS(100));
i2s_stop(I2S_NUM_0);
i2s_driver_uninstall(I2S_NUM_0);
i2sInitialized_ = false;
}
// Re-init Tone.cpp's I2S driver so notification tones work again
// The next call to tone_play() will re-initialize via tone_init()
ESP_LOGI(TAG, "Playback stopped");
}
void I2SPlayback::destroyDecoder() {
delete decoder_;
decoder_ = nullptr;
delete pcmRing_;
pcmRing_ = nullptr;
free(decodeBuf_);
decodeBuf_ = nullptr;
free(dropBuf_);
dropBuf_ = nullptr;
decodeBufSize_ = 0;
frameSamples_ = 0;
}
bool I2SPlayback::writeEncodedPacket(const uint8_t* data, int length) {
if (!decoder_ || !pcmRing_ || !decodeBuf_) return false;
int decodedSamples = decoder_->decode(data, length, decodeBuf_, decodeBufSize_);
if (decodedSamples <= 0) return false;
// Write decoded PCM to ring buffer
if (!pcmRing_->write(decodeBuf_, decodedSamples)) {
// Ring full — drop oldest frame, then write
if (dropBuf_) {
pcmRing_->read(dropBuf_, decodedSamples);
}
pcmRing_->write(decodeBuf_, decodedSamples);
}
return true;
}
int I2SPlayback::bufferedFrames() const {
if (!pcmRing_) return 0;
return pcmRing_->availableFrames();
}
void I2SPlayback::playbackTask(void* param) {
auto* self = static_cast<I2SPlayback*>(param);
self->playbackLoop();
vTaskDelete(NULL);
}
void I2SPlayback::playbackLoop() {
ESP_LOGI(TAG, "Playback task running on core %d", xPortGetCoreID());
// Wait for prebuffer
bool prebuffered = false;
// Frame buffer for reading from ring
int16_t* frameBuf = static_cast<int16_t*>(
heap_caps_malloc(sizeof(int16_t) * frameSamples_, MALLOC_CAP_SPIRAM));
if (!frameBuf) {
ESP_LOGE(TAG, "Failed to allocate frame buffer");
return;
}
// Silence frame for underruns
int16_t* silenceFrame = static_cast<int16_t*>(
heap_caps_calloc(frameSamples_, sizeof(int16_t), MALLOC_CAP_SPIRAM));
while (playing_.load(std::memory_order_relaxed)) {
// Prebuffer: wait until we have enough frames before starting playback
if (!prebuffered) {
if (pcmRing_ && pcmRing_->availableFrames() >= PREBUFFER_FRAMES) {
prebuffered = true;
ESP_LOGI(TAG, "Prebuffer complete, starting playback");
} else {
vTaskDelay(pdMS_TO_TICKS(5));
continue;
}
}
// Read a frame from the ring buffer
bool hasFrame = pcmRing_ && pcmRing_->read(frameBuf, frameSamples_);
int16_t* outputData;
if (!hasFrame) {
// Underrun — output silence
outputData = silenceFrame;
} else if (muted_.load(std::memory_order_relaxed)) {
// Muted — output silence but keep consuming
outputData = silenceFrame;
} else {
outputData = frameBuf;
}
// Write to I2S DMA
size_t bytesWritten;
esp_err_t err = i2s_write(I2S_NUM_0, outputData,
frameSamples_ * sizeof(int16_t),
&bytesWritten, pdMS_TO_TICKS(100));
if (err != ESP_OK) {
ESP_LOGW(TAG, "I2S write error: %d", err);
}
}
free(frameBuf);
free(silenceFrame);
ESP_LOGI(TAG, "Playback task exiting");
}
#endif // ARDUINO

View File

@@ -0,0 +1,97 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#pragma once
#include <cstdint>
#include <atomic>
class PacketRingBuffer;
class Codec2Wrapper;
/**
* ESP32 I2S speaker playback engine for LXST voice streaming.
*
* Shares I2S_NUM_0 with the tone generator (Tone.cpp). When voice playback
* starts, it takes ownership of I2S_NUM_0 and reconfigures it for voice.
* When stopped, I2S_NUM_0 is released so tones can reclaim it.
*
* The tone generator must be stopped before starting voice playback.
*
* Audio flow:
* Network -> writeEncodedPacket() -> decode -> PCM ring buffer -> I2S DMA
*/
class I2SPlayback {
public:
I2SPlayback();
~I2SPlayback();
I2SPlayback(const I2SPlayback&) = delete;
I2SPlayback& operator=(const I2SPlayback&) = delete;
/**
* Configure the decoder for a specific Codec2 mode.
* @param codec2Mode Codec2 library mode (0=3200, 2=1600, 8=700C)
* @return true on success
*/
bool configureDecoder(int codec2Mode);
/**
* Start voice playback. Takes over I2S_NUM_0.
* Tone generator must be stopped first.
* @return true on success
*/
bool start();
/** Stop voice playback and release I2S_NUM_0. */
void stop();
/**
* Write an encoded packet for playback.
* Called by the network layer. Decodes to PCM and queues.
*
* @param data Encoded packet (with LXST mode header byte)
* @param length Packet length in bytes
* @return true on success
*/
bool writeEncodedPacket(const uint8_t* data, int length);
/** Mute/unmute playback (outputs silence but keeps consuming data). */
void setMute(bool muted) { muted_.store(muted, std::memory_order_relaxed); }
bool isMuted() const { return muted_.load(std::memory_order_relaxed); }
bool isPlaying() const { return playing_.load(std::memory_order_relaxed); }
/** Number of decoded PCM frames buffered. */
int bufferedFrames() const;
/** Destroy the decoder and release codec resources. */
void destroyDecoder();
private:
static void playbackTask(void* param);
void playbackLoop();
bool i2sInitialized_ = false;
std::atomic<bool> playing_{false};
std::atomic<bool> muted_{false};
void* taskHandle_ = nullptr;
Codec2Wrapper* decoder_ = nullptr;
PacketRingBuffer* pcmRing_ = nullptr;
// Decode buffer for incoming encoded packets
int16_t* decodeBuf_ = nullptr;
int decodeBufSize_ = 0;
int frameSamples_ = 0;
// Drop buffer for ring overflow
int16_t* dropBuf_ = nullptr;
static constexpr int SAMPLE_RATE = 8000;
static constexpr int PCM_RING_FRAMES = 16;
static constexpr int PREBUFFER_FRAMES = 3;
static constexpr int PLAYBACK_TASK_STACK = 4096;
static constexpr int PLAYBACK_TASK_PRIORITY = 5;
static constexpr int PLAYBACK_TASK_CORE = 0;
};

View File

@@ -0,0 +1,12 @@
{
"name": "lxst_audio",
"version": "0.1.0",
"description": "LXST real-time voice streaming for ESP32-S3 T-Deck Plus",
"frameworks": "arduino",
"platforms": "espressif32",
"dependencies": {
"Wire": "*",
"tdeck_ui": "*",
"tone": "*"
}
}

View File

@@ -0,0 +1,195 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#include "lxst_audio.h"
#ifdef ARDUINO
#include <esp_log.h>
#include <Hardware/TDeck/Config.h>
#include "es7210.h"
#include "i2s_capture.h"
#include "i2s_playback.h"
#include "Tone.h"
using namespace Hardware::TDeck;
static const char* TAG = "LXST:Audio";
LXSTAudio::LXSTAudio() = default;
LXSTAudio::~LXSTAudio() {
deinit();
}
bool LXSTAudio::init(int codec2Mode, uint8_t micGain) {
if (initialized_) {
ESP_LOGW(TAG, "Already initialized");
return true;
}
codec2Mode_ = codec2Mode;
// Initialize ES7210 mic array via I2C
ESP_LOGI(TAG, "Initializing ES7210 mic array...");
if (!ES7210::init(I2C::ES7210_ADDR, static_cast<ES7210::MicGain>(micGain))) {
ESP_LOGE(TAG, "ES7210 init failed");
return false;
}
// Create capture engine
capture_ = new I2SCapture();
if (!capture_->init()) {
ESP_LOGE(TAG, "I2S capture init failed");
delete capture_;
capture_ = nullptr;
return false;
}
// Configure encoder on capture side
if (!capture_->configureEncoder(codec2Mode, true)) {
ESP_LOGE(TAG, "Capture encoder config failed");
delete capture_;
capture_ = nullptr;
return false;
}
// Create playback engine (doesn't init I2S yet — deferred to start())
playback_ = new I2SPlayback();
if (!playback_->configureDecoder(codec2Mode)) {
ESP_LOGE(TAG, "Playback decoder config failed");
delete capture_;
capture_ = nullptr;
delete playback_;
playback_ = nullptr;
return false;
}
initialized_ = true;
state_ = State::IDLE;
ESP_LOGI(TAG, "LXST Audio initialized: Codec2 mode %d", codec2Mode);
return true;
}
void LXSTAudio::deinit() {
stopCapture();
stopPlayback();
if (capture_) {
capture_->destroyEncoder();
delete capture_;
capture_ = nullptr;
}
if (playback_) {
playback_->destroyDecoder();
delete playback_;
playback_ = nullptr;
}
initialized_ = false;
state_ = State::IDLE;
ESP_LOGI(TAG, "LXST Audio deinitialized");
}
bool LXSTAudio::startCapture() {
if (!initialized_ || !capture_) return false;
// Half-duplex: stop playback first
if (state_ == State::PLAYING) {
stopPlayback();
}
if (state_ == State::CAPTURING) return true; // Already capturing
if (!capture_->start()) {
ESP_LOGE(TAG, "Failed to start capture");
return false;
}
state_ = State::CAPTURING;
ESP_LOGI(TAG, "Capture started (TX mode)");
return true;
}
void LXSTAudio::stopCapture() {
if (!capture_ || state_ != State::CAPTURING) return;
capture_->stop();
// Re-init capture I2S for next use
capture_->init();
capture_->configureEncoder(codec2Mode_, true);
state_ = State::IDLE;
ESP_LOGI(TAG, "Capture stopped");
}
bool LXSTAudio::startPlayback() {
if (!initialized_ || !playback_) return false;
// Half-duplex: stop capture first
if (state_ == State::CAPTURING) {
stopCapture();
}
if (state_ == State::PLAYING) return true; // Already playing
// Release I2S_NUM_0 from tone generator
Notification::tone_deinit();
if (!playback_->start()) {
ESP_LOGE(TAG, "Failed to start playback");
return false;
}
state_ = State::PLAYING;
ESP_LOGI(TAG, "Playback started (RX mode)");
return true;
}
void LXSTAudio::stopPlayback() {
if (!playback_ || state_ != State::PLAYING) return;
playback_->stop();
// Re-configure decoder for next use
playback_->configureDecoder(codec2Mode_);
state_ = State::IDLE;
ESP_LOGI(TAG, "Playback stopped");
// Tone.cpp will re-init its I2S on next tone_play() call
// (tone_init() checks _initialized flag)
}
bool LXSTAudio::readEncodedPacket(uint8_t* dest, int maxLength, int* actualLength) {
if (!capture_ || state_ != State::CAPTURING) return false;
return capture_->readEncodedPacket(dest, maxLength, actualLength);
}
bool LXSTAudio::writeEncodedPacket(const uint8_t* data, int length) {
if (!playback_ || state_ != State::PLAYING) return false;
return playback_->writeEncodedPacket(data, length);
}
void LXSTAudio::setCaptureMute(bool muted) {
if (capture_) capture_->setMute(muted);
}
void LXSTAudio::setPlaybackMute(bool muted) {
if (playback_) playback_->setMute(muted);
}
int LXSTAudio::capturePacketsAvailable() const {
if (!capture_) return 0;
return capture_->availablePackets();
}
int LXSTAudio::playbackFramesBuffered() const {
if (!playback_) return 0;
return playback_->bufferedFrames();
}
#endif // ARDUINO

148
lib/lxst_audio/lxst_audio.h Normal file
View File

@@ -0,0 +1,148 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#pragma once
#include <cstdint>
#include <functional>
class I2SCapture;
class I2SPlayback;
/**
* LXST Audio Pipeline Controller for ESP32-S3 T-Deck Plus.
*
* Top-level API that coordinates:
* - ES7210 microphone initialization
* - I2S capture (mic -> encode -> ring buffer)
* - I2S playback (ring buffer -> decode -> speaker)
* - Codec2 codec lifecycle
* - Tone.cpp coexistence (releases I2S_NUM_0 for tones when idle)
*
* Currently half-duplex (push-to-talk): only capture OR playback active
* at once. Full-duplex can be added once memory profile is validated.
*
* Usage:
* LXSTAudio audio;
* audio.init(CODEC2_MODE_1600);
*
* // To transmit (push-to-talk):
* audio.startCapture();
* while (transmitting) {
* if (audio.readEncodedPacket(buf, sizeof(buf), &len)) {
* // Send over Reticulum link
* }
* }
* audio.stopCapture();
*
* // To receive:
* audio.startPlayback();
* // When encoded packet arrives from network:
* audio.writeEncodedPacket(data, len);
* // When call ends:
* audio.stopPlayback();
*/
// Codec2 library mode constants (from codec2.h)
#ifndef CODEC2_MODE_3200
#define CODEC2_MODE_3200 0
#define CODEC2_MODE_2400 1
#define CODEC2_MODE_1600 2
#define CODEC2_MODE_700C 8
#endif
class LXSTAudio {
public:
enum class State {
IDLE, // No audio activity
CAPTURING, // Microphone active, encoding
PLAYING, // Speaker active, decoding
};
LXSTAudio();
~LXSTAudio();
LXSTAudio(const LXSTAudio&) = delete;
LXSTAudio& operator=(const LXSTAudio&) = delete;
/**
* Initialize the audio pipeline.
* Sets up ES7210 mic array and configures codec.
* Does NOT start capture or playback.
*
* @param codec2Mode Codec2 library mode (default 1600)
* @param micGain ES7210 mic gain (0-14, default 8 = 24dB)
* @return true on success
*/
bool init(int codec2Mode = CODEC2_MODE_1600, uint8_t micGain = 8);
/** Tear down everything and release all resources. */
void deinit();
/**
* Start microphone capture (push-to-talk TX).
* Stops playback if active.
* @return true on success
*/
bool startCapture();
/** Stop microphone capture. */
void stopCapture();
/**
* Start speaker playback (RX mode).
* Stops capture if active.
* Tone.cpp must not be playing.
* @return true on success
*/
bool startPlayback();
/** Stop speaker playback. Releases I2S_NUM_0 for tone generator. */
void stopPlayback();
/**
* Read the next encoded packet from the capture pipeline.
* Called by the network layer during TX.
*
* @param dest Output buffer
* @param maxLength Buffer size
* @param actualLength [out] Actual packet size
* @return true if a packet was read
*/
bool readEncodedPacket(uint8_t* dest, int maxLength, int* actualLength);
/**
* Write an encoded packet into the playback pipeline.
* Called by the network layer during RX.
*
* @param data Encoded packet (with LXST mode header)
* @param length Packet length
* @return true on success
*/
bool writeEncodedPacket(const uint8_t* data, int length);
/** Mute/unmute the microphone (sends silence). */
void setCaptureMute(bool muted);
/** Mute/unmute the speaker. */
void setPlaybackMute(bool muted);
/** Current pipeline state. */
State state() const { return state_; }
/** Whether init() has been called successfully. */
bool isInitialized() const { return initialized_; }
/** Number of encoded packets available from capture. */
int capturePacketsAvailable() const;
/** Number of decoded frames buffered for playback. */
int playbackFramesBuffered() const;
private:
I2SCapture* capture_ = nullptr;
I2SPlayback* playback_ = nullptr;
State state_ = State::IDLE;
bool initialized_ = false;
int codec2Mode_ = CODEC2_MODE_1600;
};

View File

@@ -0,0 +1,65 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#include "packet_ring_buffer.h"
#ifdef ARDUINO
#include <esp_heap_caps.h>
#endif
PacketRingBuffer::PacketRingBuffer(int maxFrames, int frameSamples)
: maxFrames_(maxFrames), frameSamples_(frameSamples) {
size_t bytes = sizeof(int16_t) * maxFrames * frameSamples;
#ifdef BOARD_HAS_PSRAM
buffer_ = static_cast<int16_t*>(heap_caps_malloc(bytes, MALLOC_CAP_SPIRAM));
#else
buffer_ = static_cast<int16_t*>(malloc(bytes));
#endif
if (buffer_) {
memset(buffer_, 0, bytes);
}
}
PacketRingBuffer::~PacketRingBuffer() {
free(buffer_);
}
bool PacketRingBuffer::write(const int16_t* samples, int count) {
if (count != frameSamples_ || !buffer_) return false;
int w = writeIndex_.load(std::memory_order_relaxed);
int r = readIndex_.load(std::memory_order_acquire);
int nextW = (w + 1) % maxFrames_;
if (nextW == r) return false;
memcpy(buffer_ + w * frameSamples_, samples, sizeof(int16_t) * frameSamples_);
writeIndex_.store(nextW, std::memory_order_release);
return true;
}
bool PacketRingBuffer::read(int16_t* dest, int count) {
if (count != frameSamples_ || !buffer_) return false;
int r = readIndex_.load(std::memory_order_relaxed);
int w = writeIndex_.load(std::memory_order_acquire);
if (r == w) return false;
memcpy(dest, buffer_ + r * frameSamples_, sizeof(int16_t) * frameSamples_);
readIndex_.store((r + 1) % maxFrames_, std::memory_order_release);
return true;
}
int PacketRingBuffer::availableFrames() const {
int w = writeIndex_.load(std::memory_order_acquire);
int r = readIndex_.load(std::memory_order_acquire);
int avail = w - r;
if (avail < 0) avail += maxFrames_;
return avail;
}
void PacketRingBuffer::reset() {
writeIndex_.store(0, std::memory_order_relaxed);
readIndex_.store(0, std::memory_order_relaxed);
}

View File

@@ -0,0 +1,40 @@
// Copyright (c) 2024 LXST contributors
// SPDX-License-Identifier: MPL-2.0
#pragma once
#include <atomic>
#include <cstdint>
#include <cstring>
/**
* Lock-free Single-Producer Single-Consumer (SPSC) ring buffer for int16 audio.
*
* Ported from LXST-kt native layer. Uses acquire/release memory ordering
* on read/write indices for correct cross-thread visibility without mutexes.
*
* On ESP32-S3, the buffer is allocated in PSRAM to conserve internal RAM.
*/
class PacketRingBuffer {
public:
PacketRingBuffer(int maxFrames, int frameSamples);
~PacketRingBuffer();
PacketRingBuffer(const PacketRingBuffer&) = delete;
PacketRingBuffer& operator=(const PacketRingBuffer&) = delete;
bool write(const int16_t* samples, int count);
bool read(int16_t* dest, int count);
int availableFrames() const;
int capacity() const { return maxFrames_; }
int frameSamples() const { return frameSamples_; }
void reset();
private:
const int maxFrames_;
const int frameSamples_;
int16_t* buffer_;
std::atomic<int> writeIndex_{0};
std::atomic<int> readIndex_{0};
};

View File

@@ -62,6 +62,7 @@ namespace I2C {
constexpr uint8_t KEYBOARD_ADDR = 0x55;
constexpr uint8_t TOUCH_ADDR_1 = 0x5D; // Primary GT911 address
constexpr uint8_t TOUCH_ADDR_2 = 0x14; // Alternative GT911 address
constexpr uint8_t ES7210_ADDR = 0x40; // ES7210 mic array ADC (AD0=GND, AD1=GND)
// I2C timing
constexpr uint32_t FREQUENCY = 400000; // 400kHz
@@ -152,11 +153,17 @@ namespace Radio {
}
namespace Audio {
// I2S speaker output pins
// I2S speaker output pins (I2S_NUM_0)
constexpr uint8_t I2S_BCK = 7; // Bit clock
constexpr uint8_t I2S_WS = 5; // Word select (LRCK)
constexpr uint8_t I2S_DOUT = 6; // Data out
// Note: Pin::POWER_EN (10) must be HIGH to enable speaker power
// ES7210 microphone array I2S capture pins (I2S_NUM_1)
constexpr uint8_t MIC_MCLK = 48; // Master clock output to ES7210
constexpr uint8_t MIC_SCK = 47; // Bit clock (SCLK/BCLK)
constexpr uint8_t MIC_LRCK = 21; // Word select (LRCK)
constexpr uint8_t MIC_DIN = 14; // Data in (SDOUT1 from ES7210)
}
namespace SDCard {

View File

@@ -124,6 +124,15 @@ void tone_stop() {
i2s_write(I2S_PORT, silence, sizeof(silence), &bytes_written, pdMS_TO_TICKS(2000));
}
void tone_deinit() {
if (!_initialized) return;
tone_stop();
i2s_driver_uninstall(I2S_PORT);
_initialized = false;
Serial.println("[TONE] I2S deinitialized");
}
bool tone_is_playing() {
return _playing;
}

View File

@@ -33,6 +33,14 @@ void tone_play(uint16_t frequency, uint16_t duration_ms, uint8_t volume = 50);
*/
void tone_stop();
/**
* Deinitialize the I2S driver, releasing I2S_NUM_0.
* Call this before another component (e.g., LXST voice playback)
* needs to take ownership of I2S_NUM_0.
* After calling this, tone_play() will automatically reinitialize.
*/
void tone_deinit();
/**
* Check if a tone is currently playing
* @return true if playing, false if silent

View File

@@ -38,6 +38,8 @@ lib_deps =
tone
auto_interface
ble_interface
lxst_audio
sh123/esp32_codec2@^1.0.7
symlink://${PROJECT_DIR}/deps/microReticulum/lib/libbz2
; Library dependency finder mode (deep search)
@@ -121,6 +123,8 @@ lib_deps =
auto_interface
h2zero/NimBLE-Arduino@^2.1.0
ble_interface
lxst_audio
sh123/esp32_codec2@^1.0.7
symlink://${PROJECT_DIR}/deps/microReticulum/lib/libbz2
; Build configuration