From f04bdac2af6473778e4a3c1154fa3fa2b694ab6d Mon Sep 17 00:00:00 2001 From: Colin Date: Fri, 9 Jan 2026 22:52:33 +0800 Subject: [PATCH] Recorder and asr. --- QWEN.md | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++ main/asr.c | 59 ++++++++++++++++- main/asr.h | 4 +- main/main.c | 2 +- main/record.c | 47 +++----------- main/record.h | 2 +- 6 files changed, 245 insertions(+), 41 deletions(-) create mode 100644 QWEN.md diff --git a/QWEN.md b/QWEN.md new file mode 100644 index 0000000..73c73f4 --- /dev/null +++ b/QWEN.md @@ -0,0 +1,172 @@ +# ESP32 I2S Audio Processing Project + +## Project Overview + +This is an ESP32-based audio processing project that implements I2S (Inter-IC Sound) communication for capturing audio data using an ES7210 codec. The project includes functionality for: + +- Audio recording to SD card in WAV format +- Automatic Speech Recognition (ASR) integration with cloud APIs +- WiFi connectivity for cloud services +- USB Mass Storage (MSC) for file access +- Power management and sleep modes +- Time synchronization via NTP + +The project is built using the ESP-IDF (Espressif IoT Development Framework) and targets audio applications such as voice recorders, speech recognition systems, or audio processing units. + +## Key Features + +### Audio Recording +- I2S TDM (Time Division Multiplexing) interface with ES7210 codec +- Configurable sample rate (16kHz), bit width (16-bit), and stereo channels +- Direct Memory Access (DMA) for efficient audio data transfer +- WAV file format with proper headers +- Timestamp-based file naming for recordings + +### Connectivity +- WiFi station mode for internet connectivity +- SD card mounting via SDMMC interface +- NTP time synchronization with multiple servers +- HTTP client for cloud API communication + +### Cloud Integration +- ASR (Automatic Speech Recognition) using SiliconFlow API +- Audio transcription capabilities +- Secure HTTPS communication with bearer token authentication + +### Power Management +- Dynamic CPU frequency scaling (8MHz to 240MHz) +- Light sleep mode for power conservation +- Automatic power management configuration + +### USB Interface +- USB Mass Storage Class (MSC) implementation +- SD card access via USB when connected to host +- Console commands for file system operations + +## Hardware Configuration + +### I2S Interface +- Master clock (MCK): GPIO 38 +- Bit clock (BCK): GPIO 14 +- Word select (WS): GPIO 13 +- Data in (DI): GPIO 12 + +### ES7210 Codec +- I2C interface: Port 0 +- SDA: GPIO 1 +- SCL: GPIO 2 +- I2C address: 0x41 + +### SD Card Interface +- Command: GPIO 48 +- Clock: GPIO 47 +- Data 0: GPIO 21 +- Mount point: `/sdcard` + +### Additional Pins +- LED indicator: GPIO 48 + +## Building and Running + +### Prerequisites +- ESP-IDF v4.x or later installed and configured +- ESP32 development board +- ES7210 I2S audio codec +- SD card + +### Build Process +```bash +# Navigate to project directory +cd /path/to/esp32i2s + +# Configure project (optional, if needed) +idf.py menuconfig + +# Build the project +idf.py build + +# Flash to ESP32 +idf.py flash + +# Monitor serial output +idf.py monitor +``` + +### Configuration Options +- Sample rate: 16000 Hz +- Channel count: 2 (Stereo) +- Bit width: 16 bits +- Recording duration: 20 seconds per file +- DMA buffer count: 8 +- DMA buffer length: 512 samples + +## File Structure + +``` +esp32i2s/ +├── CMakeLists.txt # Main build configuration +├── partitions.csv # Partition table +├── sdkconfig # SDK configuration +├── sdkconfig.defaults # Default SDK settings +├── main/ # Main application source +│ ├── main.c # Main application entry point +│ ├── record.c/h # Audio recording functionality +│ ├── asr.c/h # Automatic Speech Recognition +│ ├── base.h # Common definitions and includes +│ ├── format_wav.h # WAV file format definitions +│ ├── usb_msc.c # USB Mass Storage implementation +│ └── ... +└── ... +``` + +## Development Conventions + +### Coding Style +- Follow ESP-IDF coding conventions +- Use ESP_LOG macros for logging +- Handle errors with ESP_ERROR_CHECK and ESP_RETURN_ON_FALSE +- Use FreeRTOS tasks for concurrent operations + +### Memory Management +- Use DMA-capable memory allocation for I2S buffers +- Properly free allocated memory in error paths +- Monitor heap usage for memory leaks + +### Power Efficiency +- Implement sleep modes when idle +- Use power management configuration appropriately +- Minimize active periods for battery operation + +## Testing and Debugging + +### Serial Monitor +Monitor the serial output for logging information: +- I2S initialization status +- SD card mounting results +- WiFi connection status +- Recording progress +- ASR API responses + +### Console Commands +When USB MSC is active, the following console commands are available: +- `read` - Read README.MD file +- `write` - Create/update README.MD file +- `size` - Show storage capacity +- `expose` - Expose storage to USB host +- `status` - Show storage exposure status +- `exit` - Exit application + +## Security Considerations + +- WiFi credentials are hardcoded in `asr.c` (WIFI_SSID and WIFI_PASS) +- API token is hardcoded in `asr.c` (BEARER_TOKEN) +- HTTPS communication uses certificate validation +- Consider using NVS for storing sensitive information instead of hardcoding + +## Known Issues and Limitations + +- WiFi credentials and API tokens are hardcoded in source +- Recording duration is fixed at 20 seconds +- Only supports PCM WAV format +- Requires internet connectivity for ASR functionality +- USB MSC and file system access cannot be used simultaneously \ No newline at end of file diff --git a/main/asr.c b/main/asr.c index b0f7937..3b66557 100644 --- a/main/asr.c +++ b/main/asr.c @@ -184,12 +184,30 @@ static esp_err_t read_file_to_buffer(const char* filename, uint8_t** buffer, return ESP_OK; } +// Global variable to store response data +static char* response_data = NULL; +static int response_data_len = 0; + // HTTP事件处理器 esp_err_t _http_event_handler(esp_http_client_event_t* evt) { switch (evt->event_id) { case HTTP_EVENT_ON_DATA: if (!esp_http_client_is_chunked_response(evt->client)) { ESP_LOGI(TAG, "HTTP Response: %.*s", evt->data_len, (char*)evt->data); + + // Allocate/reallocate memory to store response data + char* temp = + realloc(response_data, response_data_len + evt->data_len + 1); + if (temp == NULL) { + ESP_LOGE(TAG, "Failed to allocate memory for response data"); + return ESP_FAIL; + } + response_data = temp; + + // Copy the new data to the response buffer + memcpy(response_data + response_data_len, evt->data, evt->data_len); + response_data_len += evt->data_len; + response_data[response_data_len] = '\0'; // Null terminate } break; case HTTP_EVENT_ON_FINISH: @@ -267,7 +285,11 @@ cleanup: return body; } -static void send_audio_transcription(const char* filepath) { +void send_audio_transcription(const char* filepath) { + // Initialize response data buffer + response_data = NULL; + response_data_len = 0; + uint8_t* file_data = NULL; size_t file_size = 0; @@ -327,6 +349,34 @@ static void send_audio_transcription(const char* filepath) { if (status_code == 200) { ESP_LOGI(TAG, "Request successful"); + + // Create output filename by replacing .wav extension with .txt + char output_filepath[256]; + strncpy(output_filepath, filepath, sizeof(output_filepath) - 1); + output_filepath[sizeof(output_filepath) - 1] = '\0'; + + // Find the last dot to replace extension + char* ext = strrchr(output_filepath, '.'); + if (ext != NULL) { + strcpy(ext, ".txt"); + } else { + // If no extension found, just append .txt + strcat(output_filepath, ".txt"); + } + + // Save response data to text file + if (response_data != NULL && response_data_len > 0) { + FILE* f = fopen(output_filepath, "w"); + if (f != NULL) { + fwrite(response_data, 1, response_data_len, f); + fclose(f); + ESP_LOGI(TAG, "Response saved to %s", output_filepath); + } else { + ESP_LOGE(TAG, "Failed to open file for writing: %s", output_filepath); + } + } else { + ESP_LOGW(TAG, "No response data to save"); + } } else { ESP_LOGE(TAG, "Request failed with status: %d", status_code); } @@ -338,6 +388,13 @@ static void send_audio_transcription(const char* filepath) { esp_http_client_cleanup(client); free(body); free(file_data); + + // Free response data buffer + if (response_data != NULL) { + free(response_data); + response_data = NULL; + response_data_len = 0; + } } static void debug_network_test(void) { diff --git a/main/asr.h b/main/asr.h index 528d939..ce82c20 100644 --- a/main/asr.h +++ b/main/asr.h @@ -21,4 +21,6 @@ #include "sdmmc_cmd.h" #include "time.h" -void wifi_init_sta(void); \ No newline at end of file +void wifi_init_sta(void); + +void send_audio_transcription(const char* filepath); \ No newline at end of file diff --git a/main/main.c b/main/main.c index e70a074..3733334 100644 --- a/main/main.c +++ b/main/main.c @@ -291,7 +291,7 @@ void app_main(void) { get_time_init(); wait_for_time_sync(10); - get_current_time(); + print_current_time(); config_power_manager(); diff --git a/main/record.c b/main/record.c index 4c9387d..bad2ec4 100644 --- a/main/record.c +++ b/main/record.c @@ -1,6 +1,8 @@ #include "record.h" +#include "asr.h" + time_t now; struct tm timeinfo; static const char* TAG = "record"; @@ -25,19 +27,9 @@ void get_time_init() { ESP_LOGI(TAG, "SNTP初始化完成"); } -esp_err_t get_current_time() { +esp_err_t print_current_time() { char strftime_buf[64]; - // 获取时间 - time(&now); - localtime_r(&now, &timeinfo); - - // 检查时间是否有效(1970年之后) - if (timeinfo.tm_year < 70) { - ESP_LOGW(TAG, "时间未同步,正在等待NTP..."); - return ESP_ERR_TIMEOUT; - } - // 格式化输出时间 strftime(strftime_buf, sizeof(strftime_buf), "%c", &timeinfo); ESP_LOGI(TAG, "当前时间: %s", strftime_buf); @@ -58,8 +50,12 @@ esp_err_t wait_for_time_sync(int timeout_seconds) { vTaskDelay(pdMS_TO_TICKS(1000)); retry++; - // 可选:检查是否已经同步 - if (get_current_time() == ESP_OK) { + time(&now); + localtime_r(&now, &timeinfo); + + // 检查时间是否有效(2000年之后) + if (timeinfo.tm_year > 100) { + ESP_LOGW(TAG, "时间未同步,正在等待NTP..."); return ESP_OK; } } @@ -73,30 +69,8 @@ esp_err_t wait_for_time_sync(int timeout_seconds) { return ESP_OK; } -void time_example_task(void* pvParameters) { - get_time_init(); - - // 等待首次同步 - if (wait_for_time_sync(15) != ESP_OK) { - ESP_LOGW(TAG, "首次同步失败,将继续重试"); - } - - while (1) { - // 每隔一段时间获取时间 - if (get_current_time() == ESP_OK) { - // 时间有效,执行你的业务逻辑 - } else { - // 时间无效,可能需要重新同步 - ESP_LOGW(TAG, "时间无效,尝试重新同步..."); - } - - vTaskDelay(pdMS_TO_TICKS(60000)); // 每分钟获取一次 - } -} - char* get_file_name_from_time(void) { wait_for_time_sync(10); - localtime_r(&now, &timeinfo); strftime(record_file_name, 48, "/sdcard/Record_%Y%m%d%H%M%S.wav", &timeinfo); return record_file_name; } @@ -137,10 +111,9 @@ esp_err_t record_wav(i2s_chan_handle_t i2s_rx_chan) { TAG, "error while writing samples to wav file"); wav_written += bytes_read; } - i2s_channel_disable(i2s_rx_chan); ESP_LOGI(TAG, "Recording done! Flushing file buffer"); fclose(f); - + send_audio_transcription(record_file_name); return ret; } \ No newline at end of file diff --git a/main/record.h b/main/record.h index 2e1470a..20e256b 100644 --- a/main/record.h +++ b/main/record.h @@ -3,5 +3,5 @@ esp_err_t record_wav(i2s_chan_handle_t i2s_rx_chan); void get_time_init(); -esp_err_t get_current_time(); +esp_err_t print_current_time(); esp_err_t wait_for_time_sync(int timeout_seconds); \ No newline at end of file