Recorder and asr.

2026-01-09 22:52:33 +08:00 · 2026-01-09 22:52:33 +08:00 · f04bdac2af
parent ddb902b598
commit f04bdac2af
6 changed files with 245 additions and 41 deletions
--- a/QWEN.md
+++ b/QWEN.md
@ -0,0 +1,172 @@
+# ESP32 I2S Audio Processing Project
+
+## Project Overview
+
+This is an ESP32-based audio processing project that implements I2S (Inter-IC Sound) communication for capturing audio data using an ES7210 codec. The project includes functionality for:
+
+- Audio recording to SD card in WAV format
+- Automatic Speech Recognition (ASR) integration with cloud APIs
+- WiFi connectivity for cloud services
+- USB Mass Storage (MSC) for file access
+- Power management and sleep modes
+- Time synchronization via NTP
+
+The project is built using the ESP-IDF (Espressif IoT Development Framework) and targets audio applications such as voice recorders, speech recognition systems, or audio processing units.
+
+## Key Features
+
+### Audio Recording
+- I2S TDM (Time Division Multiplexing) interface with ES7210 codec
+- Configurable sample rate (16kHz), bit width (16-bit), and stereo channels
+- Direct Memory Access (DMA) for efficient audio data transfer
+- WAV file format with proper headers
+- Timestamp-based file naming for recordings
+
+### Connectivity
+- WiFi station mode for internet connectivity
+- SD card mounting via SDMMC interface
+- NTP time synchronization with multiple servers
+- HTTP client for cloud API communication
+
+### Cloud Integration
+- ASR (Automatic Speech Recognition) using SiliconFlow API
+- Audio transcription capabilities
+- Secure HTTPS communication with bearer token authentication
+
+### Power Management
+- Dynamic CPU frequency scaling (8MHz to 240MHz)
+- Light sleep mode for power conservation
+- Automatic power management configuration
+
+### USB Interface
+- USB Mass Storage Class (MSC) implementation
+- SD card access via USB when connected to host
+- Console commands for file system operations
+
+## Hardware Configuration
+
+### I2S Interface
+- Master clock (MCK): GPIO 38
+- Bit clock (BCK): GPIO 14
+- Word select (WS): GPIO 13
+- Data in (DI): GPIO 12
+
+### ES7210 Codec
+- I2C interface: Port 0
+- SDA: GPIO 1
+- SCL: GPIO 2
+- I2C address: 0x41
+
+### SD Card Interface
+- Command: GPIO 48
+- Clock: GPIO 47
+- Data 0: GPIO 21
+- Mount point: `/sdcard`
+
+### Additional Pins
+- LED indicator: GPIO 48
+
+## Building and Running
+
+### Prerequisites
+- ESP-IDF v4.x or later installed and configured
+- ESP32 development board
+- ES7210 I2S audio codec
+- SD card
+
+### Build Process
+```bash
+# Navigate to project directory
+cd /path/to/esp32i2s
+
+# Configure project (optional, if needed)
+idf.py menuconfig
+
+# Build the project
+idf.py build
+
+# Flash to ESP32
+idf.py flash
+
+# Monitor serial output
+idf.py monitor
+```
+
+### Configuration Options
+- Sample rate: 16000 Hz
+- Channel count: 2 (Stereo)
+- Bit width: 16 bits
+- Recording duration: 20 seconds per file
+- DMA buffer count: 8
+- DMA buffer length: 512 samples
+
+## File Structure
+
+```
+esp32i2s/
+├── CMakeLists.txt          # Main build configuration
+├── partitions.csv          # Partition table
+├── sdkconfig              # SDK configuration
+├── sdkconfig.defaults     # Default SDK settings
+├── main/                  # Main application source
+│   ├── main.c             # Main application entry point
+│   ├── record.c/h         # Audio recording functionality
+│   ├── asr.c/h            # Automatic Speech Recognition
+│   ├── base.h             # Common definitions and includes
+│   ├── format_wav.h       # WAV file format definitions
+│   ├── usb_msc.c          # USB Mass Storage implementation
+│   └── ...
+└── ...
+```
+
+## Development Conventions
+
+### Coding Style
+- Follow ESP-IDF coding conventions
+- Use ESP_LOG macros for logging
+- Handle errors with ESP_ERROR_CHECK and ESP_RETURN_ON_FALSE
+- Use FreeRTOS tasks for concurrent operations
+
+### Memory Management
+- Use DMA-capable memory allocation for I2S buffers
+- Properly free allocated memory in error paths
+- Monitor heap usage for memory leaks
+
+### Power Efficiency
+- Implement sleep modes when idle
+- Use power management configuration appropriately
+- Minimize active periods for battery operation
+
+## Testing and Debugging
+
+### Serial Monitor
+Monitor the serial output for logging information:
+- I2S initialization status
+- SD card mounting results
+- WiFi connection status
+- Recording progress
+- ASR API responses
+
+### Console Commands
+When USB MSC is active, the following console commands are available:
+- `read` - Read README.MD file
+- `write` - Create/update README.MD file
+- `size` - Show storage capacity
+- `expose` - Expose storage to USB host
+- `status` - Show storage exposure status
+- `exit` - Exit application
+
+## Security Considerations
+
+- WiFi credentials are hardcoded in `asr.c` (WIFI_SSID and WIFI_PASS)
+- API token is hardcoded in `asr.c` (BEARER_TOKEN)
+- HTTPS communication uses certificate validation
+- Consider using NVS for storing sensitive information instead of hardcoding
+
+## Known Issues and Limitations
+
+- WiFi credentials and API tokens are hardcoded in source
+- Recording duration is fixed at 20 seconds
+- Only supports PCM WAV format
+- Requires internet connectivity for ASR functionality
+- USB MSC and file system access cannot be used simultaneously
--- a/main/asr.c
+++ b/main/asr.c
@ -184,12 +184,30 @@ static esp_err_t read_file_to_buffer(const char* filename, uint8_t** buffer,
  return ESP_OK;
 }

+// Global variable to store response data
+static char* response_data = NULL;
+static int response_data_len = 0;
+
 // HTTP事件处理器
 esp_err_t _http_event_handler(esp_http_client_event_t* evt) {
  switch (evt->event_id) {
    case HTTP_EVENT_ON_DATA:
      if (!esp_http_client_is_chunked_response(evt->client)) {
        ESP_LOGI(TAG, "HTTP Response: %.*s", evt->data_len, (char*)evt->data);
+
+        // Allocate/reallocate memory to store response data
+        char* temp =
+            realloc(response_data, response_data_len + evt->data_len + 1);
+        if (temp == NULL) {
+          ESP_LOGE(TAG, "Failed to allocate memory for response data");
+          return ESP_FAIL;
+        }
+        response_data = temp;
+
+        // Copy the new data to the response buffer
+        memcpy(response_data + response_data_len, evt->data, evt->data_len);
+        response_data_len += evt->data_len;
+        response_data[response_data_len] = '\0';  // Null terminate
      }
      break;
    case HTTP_EVENT_ON_FINISH:
@ -267,7 +285,11 @@ cleanup:
  return body;
 }

-static void send_audio_transcription(const char* filepath) {
+void send_audio_transcription(const char* filepath) {
+  // Initialize response data buffer
+  response_data = NULL;
+  response_data_len = 0;
+
  uint8_t* file_data = NULL;
  size_t file_size = 0;

@ -327,6 +349,34 @@ static void send_audio_transcription(const char* filepath) {

    if (status_code == 200) {
      ESP_LOGI(TAG, "Request successful");
+
+      // Create output filename by replacing .wav extension with .txt
+      char output_filepath[256];
+      strncpy(output_filepath, filepath, sizeof(output_filepath) - 1);
+      output_filepath[sizeof(output_filepath) - 1] = '\0';
+
+      // Find the last dot to replace extension
+      char* ext = strrchr(output_filepath, '.');
+      if (ext != NULL) {
+        strcpy(ext, ".txt");
+      } else {
+        // If no extension found, just append .txt
+        strcat(output_filepath, ".txt");
+      }
+
+      // Save response data to text file
+      if (response_data != NULL && response_data_len > 0) {
+        FILE* f = fopen(output_filepath, "w");
+        if (f != NULL) {
+          fwrite(response_data, 1, response_data_len, f);
+          fclose(f);
+          ESP_LOGI(TAG, "Response saved to %s", output_filepath);
+        } else {
+          ESP_LOGE(TAG, "Failed to open file for writing: %s", output_filepath);
+        }
+      } else {
+        ESP_LOGW(TAG, "No response data to save");
+      }
    } else {
      ESP_LOGE(TAG, "Request failed with status: %d", status_code);
    }
@ -338,6 +388,13 @@ static void send_audio_transcription(const char* filepath) {
  esp_http_client_cleanup(client);
  free(body);
  free(file_data);
+
+  // Free response data buffer
+  if (response_data != NULL) {
+    free(response_data);
+    response_data = NULL;
+    response_data_len = 0;
+  }
 }

 static void debug_network_test(void) {
--- a/main/asr.h
+++ b/main/asr.h
@ -21,4 +21,6 @@
 #include "sdmmc_cmd.h"
 #include "time.h"

-void wifi_init_sta(void);
+void wifi_init_sta(void);
+
+void send_audio_transcription(const char* filepath);
--- a/main/main.c
+++ b/main/main.c
@ -291,7 +291,7 @@ void app_main(void) {

  get_time_init();
  wait_for_time_sync(10);
-  get_current_time();
+  print_current_time();

  config_power_manager();

--- a/main/record.c
+++ b/main/record.c
@ -1,6 +1,8 @@

 #include "record.h"

+#include "asr.h"
+
 time_t now;
 struct tm timeinfo;
 static const char* TAG = "record";
@ -25,19 +27,9 @@ void get_time_init() {
  ESP_LOGI(TAG, "SNTP初始化完成");
 }

-esp_err_t get_current_time() {
+esp_err_t print_current_time() {
  char strftime_buf[64];

-  // 获取时间
-  time(&now);
-  localtime_r(&now, &timeinfo);
-
-  // 检查时间是否有效（1970年之后）
-  if (timeinfo.tm_year < 70) {
-    ESP_LOGW(TAG, "时间未同步，正在等待NTP...");
-    return ESP_ERR_TIMEOUT;
-  }
-
  // 格式化输出时间
  strftime(strftime_buf, sizeof(strftime_buf), "%c", &timeinfo);
  ESP_LOGI(TAG, "当前时间: %s", strftime_buf);
@ -58,8 +50,12 @@ esp_err_t wait_for_time_sync(int timeout_seconds) {
    vTaskDelay(pdMS_TO_TICKS(1000));
    retry++;

-    // 可选：检查是否已经同步
-    if (get_current_time() == ESP_OK) {
+    time(&now);
+    localtime_r(&now, &timeinfo);
+
+    // 检查时间是否有效（2000年之后）
+    if (timeinfo.tm_year > 100) {
+      ESP_LOGW(TAG, "时间未同步，正在等待NTP...");
      return ESP_OK;
    }
  }
@ -73,30 +69,8 @@ esp_err_t wait_for_time_sync(int timeout_seconds) {
  return ESP_OK;
 }

-void time_example_task(void* pvParameters) {
-  get_time_init();
-
-  // 等待首次同步
-  if (wait_for_time_sync(15) != ESP_OK) {
-    ESP_LOGW(TAG, "首次同步失败，将继续重试");
-  }
-
-  while (1) {
-    // 每隔一段时间获取时间
-    if (get_current_time() == ESP_OK) {
-      // 时间有效，执行你的业务逻辑
-    } else {
-      // 时间无效，可能需要重新同步
-      ESP_LOGW(TAG, "时间无效，尝试重新同步...");
-    }
-
-    vTaskDelay(pdMS_TO_TICKS(60000));  // 每分钟获取一次
-  }
-}
-
 char* get_file_name_from_time(void) {
  wait_for_time_sync(10);
-  localtime_r(&now, &timeinfo);
  strftime(record_file_name, 48, "/sdcard/Record_%Y%m%d%H%M%S.wav", &timeinfo);
  return record_file_name;
 }
@ -137,10 +111,9 @@ esp_err_t record_wav(i2s_chan_handle_t i2s_rx_chan) {
                        TAG, "error while writing samples to wav file");
    wav_written += bytes_read;
  }
-
  i2s_channel_disable(i2s_rx_chan);
  ESP_LOGI(TAG, "Recording done! Flushing file buffer");
  fclose(f);
-
+  send_audio_transcription(record_file_name);
  return ret;
 }
--- a/main/record.h
+++ b/main/record.h
@ -3,5 +3,5 @@
 esp_err_t record_wav(i2s_chan_handle_t i2s_rx_chan);

 void get_time_init();
-esp_err_t get_current_time();
+esp_err_t print_current_time();
 esp_err_t wait_for_time_sync(int timeout_seconds);