From f04bdac2af6473778e4a3c1154fa3fa2b694ab6d Mon Sep 17 00:00:00 2001
From: Colin <colin@gmail.com>
Date: Fri, 9 Jan 2026 22:52:33 +0800
Subject: [PATCH] Recorder and asr.

---
 QWEN.md       | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++
 main/asr.c    |  59 ++++++++++++++++-
 main/asr.h    |   4 +-
 main/main.c   |   2 +-
 main/record.c |  47 +++-----------
 main/record.h |   2 +-
 6 files changed, 245 insertions(+), 41 deletions(-)
 create mode 100644 QWEN.md

diff --git a/QWEN.md b/QWEN.md
new file mode 100644
index 0000000..73c73f4
--- /dev/null
+++ b/QWEN.md
@@ -0,0 +1,172 @@
+# ESP32 I2S Audio Processing Project
+
+## Project Overview
+
+This is an ESP32-based audio processing project that implements I2S (Inter-IC Sound) communication for capturing audio data using an ES7210 codec. The project includes functionality for:
+
+- Audio recording to SD card in WAV format
+- Automatic Speech Recognition (ASR) integration with cloud APIs
+- WiFi connectivity for cloud services
+- USB Mass Storage (MSC) for file access
+- Power management and sleep modes
+- Time synchronization via NTP
+
+The project is built using the ESP-IDF (Espressif IoT Development Framework) and targets audio applications such as voice recorders, speech recognition systems, or audio processing units.
+
+## Key Features
+
+### Audio Recording
+- I2S TDM (Time Division Multiplexing) interface with ES7210 codec
+- Configurable sample rate (16kHz), bit width (16-bit), and stereo channels
+- Direct Memory Access (DMA) for efficient audio data transfer
+- WAV file format with proper headers
+- Timestamp-based file naming for recordings
+
+### Connectivity
+- WiFi station mode for internet connectivity
+- SD card mounting via SDMMC interface
+- NTP time synchronization with multiple servers
+- HTTP client for cloud API communication
+
+### Cloud Integration
+- ASR (Automatic Speech Recognition) using SiliconFlow API
+- Audio transcription capabilities
+- Secure HTTPS communication with bearer token authentication
+
+### Power Management
+- Dynamic CPU frequency scaling (8MHz to 240MHz)
+- Light sleep mode for power conservation
+- Automatic power management configuration
+
+### USB Interface
+- USB Mass Storage Class (MSC) implementation
+- SD card access via USB when connected to host
+- Console commands for file system operations
+
+## Hardware Configuration
+
+### I2S Interface
+- Master clock (MCK): GPIO 38
+- Bit clock (BCK): GPIO 14
+- Word select (WS): GPIO 13
+- Data in (DI): GPIO 12
+
+### ES7210 Codec
+- I2C interface: Port 0
+- SDA: GPIO 1
+- SCL: GPIO 2
+- I2C address: 0x41
+
+### SD Card Interface
+- Command: GPIO 48
+- Clock: GPIO 47
+- Data 0: GPIO 21
+- Mount point: `/sdcard`
+
+### Additional Pins
+- LED indicator: GPIO 48
+
+## Building and Running
+
+### Prerequisites
+- ESP-IDF v4.x or later installed and configured
+- ESP32 development board
+- ES7210 I2S audio codec
+- SD card
+
+### Build Process
+```bash
+# Navigate to project directory
+cd /path/to/esp32i2s
+
+# Configure project (optional, if needed)
+idf.py menuconfig
+
+# Build the project
+idf.py build
+
+# Flash to ESP32
+idf.py flash
+
+# Monitor serial output
+idf.py monitor
+```
+
+### Configuration Options
+- Sample rate: 16000 Hz
+- Channel count: 2 (Stereo)
+- Bit width: 16 bits
+- Recording duration: 20 seconds per file
+- DMA buffer count: 8
+- DMA buffer length: 512 samples
+
+## File Structure
+
+```
+esp32i2s/
+├── CMakeLists.txt          # Main build configuration
+├── partitions.csv          # Partition table
+├── sdkconfig              # SDK configuration
+├── sdkconfig.defaults     # Default SDK settings
+├── main/                  # Main application source
+│   ├── main.c             # Main application entry point
+│   ├── record.c/h         # Audio recording functionality
+│   ├── asr.c/h            # Automatic Speech Recognition
+│   ├── base.h             # Common definitions and includes
+│   ├── format_wav.h       # WAV file format definitions
+│   ├── usb_msc.c          # USB Mass Storage implementation
+│   └── ...
+└── ...
+```
+
+## Development Conventions
+
+### Coding Style
+- Follow ESP-IDF coding conventions
+- Use ESP_LOG macros for logging
+- Handle errors with ESP_ERROR_CHECK and ESP_RETURN_ON_FALSE
+- Use FreeRTOS tasks for concurrent operations
+
+### Memory Management
+- Use DMA-capable memory allocation for I2S buffers
+- Properly free allocated memory in error paths
+- Monitor heap usage for memory leaks
+
+### Power Efficiency
+- Implement sleep modes when idle
+- Use power management configuration appropriately
+- Minimize active periods for battery operation
+
+## Testing and Debugging
+
+### Serial Monitor
+Monitor the serial output for logging information:
+- I2S initialization status
+- SD card mounting results
+- WiFi connection status
+- Recording progress
+- ASR API responses
+
+### Console Commands
+When USB MSC is active, the following console commands are available:
+- `read` - Read README.MD file
+- `write` - Create/update README.MD file
+- `size` - Show storage capacity
+- `expose` - Expose storage to USB host
+- `status` - Show storage exposure status
+- `exit` - Exit application
+
+## Security Considerations
+
+- WiFi credentials are hardcoded in `asr.c` (WIFI_SSID and WIFI_PASS)
+- API token is hardcoded in `asr.c` (BEARER_TOKEN)
+- HTTPS communication uses certificate validation
+- Consider using NVS for storing sensitive information instead of hardcoding
+
+## Known Issues and Limitations
+
+- WiFi credentials and API tokens are hardcoded in source
+- Recording duration is fixed at 20 seconds
+- Only supports PCM WAV format
+- Requires internet connectivity for ASR functionality
+- USB MSC and file system access cannot be used simultaneously
\ No newline at end of file
diff --git a/main/asr.c b/main/asr.c
index b0f7937..3b66557 100644
--- a/main/asr.c
+++ b/main/asr.c
@@ -184,12 +184,30 @@ static esp_err_t read_file_to_buffer(const char* filename, uint8_t** buffer,
   return ESP_OK;
 }
 
+// Global variable to store response data
+static char* response_data = NULL;
+static int response_data_len = 0;
+
 // HTTP事件处理器
 esp_err_t _http_event_handler(esp_http_client_event_t* evt) {
   switch (evt->event_id) {
     case HTTP_EVENT_ON_DATA:
       if (!esp_http_client_is_chunked_response(evt->client)) {
         ESP_LOGI(TAG, "HTTP Response: %.*s", evt->data_len, (char*)evt->data);
+
+        // Allocate/reallocate memory to store response data
+        char* temp =
+            realloc(response_data, response_data_len + evt->data_len + 1);
+        if (temp == NULL) {
+          ESP_LOGE(TAG, "Failed to allocate memory for response data");
+          return ESP_FAIL;
+        }
+        response_data = temp;
+
+        // Copy the new data to the response buffer
+        memcpy(response_data + response_data_len, evt->data, evt->data_len);
+        response_data_len += evt->data_len;
+        response_data[response_data_len] = '\0';  // Null terminate
       }
       break;
     case HTTP_EVENT_ON_FINISH:
@@ -267,7 +285,11 @@ cleanup:
   return body;
 }
 
-static void send_audio_transcription(const char* filepath) {
+void send_audio_transcription(const char* filepath) {
+  // Initialize response data buffer
+  response_data = NULL;
+  response_data_len = 0;
+
   uint8_t* file_data = NULL;
   size_t file_size = 0;
 
@@ -327,6 +349,34 @@ static void send_audio_transcription(const char* filepath) {
 
     if (status_code == 200) {
       ESP_LOGI(TAG, "Request successful");
+
+      // Create output filename by replacing .wav extension with .txt
+      char output_filepath[256];
+      strncpy(output_filepath, filepath, sizeof(output_filepath) - 1);
+      output_filepath[sizeof(output_filepath) - 1] = '\0';
+
+      // Find the last dot to replace extension
+      char* ext = strrchr(output_filepath, '.');
+      if (ext != NULL) {
+        strcpy(ext, ".txt");
+      } else {
+        // If no extension found, just append .txt
+        strcat(output_filepath, ".txt");
+      }
+
+      // Save response data to text file
+      if (response_data != NULL && response_data_len > 0) {
+        FILE* f = fopen(output_filepath, "w");
+        if (f != NULL) {
+          fwrite(response_data, 1, response_data_len, f);
+          fclose(f);
+          ESP_LOGI(TAG, "Response saved to %s", output_filepath);
+        } else {
+          ESP_LOGE(TAG, "Failed to open file for writing: %s", output_filepath);
+        }
+      } else {
+        ESP_LOGW(TAG, "No response data to save");
+      }
     } else {
       ESP_LOGE(TAG, "Request failed with status: %d", status_code);
     }
@@ -338,6 +388,13 @@ static void send_audio_transcription(const char* filepath) {
   esp_http_client_cleanup(client);
   free(body);
   free(file_data);
+
+  // Free response data buffer
+  if (response_data != NULL) {
+    free(response_data);
+    response_data = NULL;
+    response_data_len = 0;
+  }
 }
 
 static void debug_network_test(void) {
diff --git a/main/asr.h b/main/asr.h
index 528d939..ce82c20 100644
--- a/main/asr.h
+++ b/main/asr.h
@@ -21,4 +21,6 @@
 #include "sdmmc_cmd.h"
 #include "time.h"
 
-void wifi_init_sta(void);
\ No newline at end of file
+void wifi_init_sta(void);
+
+void send_audio_transcription(const char* filepath);
\ No newline at end of file
diff --git a/main/main.c b/main/main.c
index e70a074..3733334 100644
--- a/main/main.c
+++ b/main/main.c
@@ -291,7 +291,7 @@ void app_main(void) {
 
   get_time_init();
   wait_for_time_sync(10);
-  get_current_time();
+  print_current_time();
 
   config_power_manager();
 
diff --git a/main/record.c b/main/record.c
index 4c9387d..bad2ec4 100644
--- a/main/record.c
+++ b/main/record.c
@@ -1,6 +1,8 @@
 
 #include "record.h"
 
+#include "asr.h"
+
 time_t now;
 struct tm timeinfo;
 static const char* TAG = "record";
@@ -25,19 +27,9 @@ void get_time_init() {
   ESP_LOGI(TAG, "SNTP初始化完成");
 }
 
-esp_err_t get_current_time() {
+esp_err_t print_current_time() {
   char strftime_buf[64];
 
-  // 获取时间
-  time(&now);
-  localtime_r(&now, &timeinfo);
-
-  // 检查时间是否有效（1970年之后）
-  if (timeinfo.tm_year < 70) {
-    ESP_LOGW(TAG, "时间未同步，正在等待NTP...");
-    return ESP_ERR_TIMEOUT;
-  }
-
   // 格式化输出时间
   strftime(strftime_buf, sizeof(strftime_buf), "%c", &timeinfo);
   ESP_LOGI(TAG, "当前时间: %s", strftime_buf);
@@ -58,8 +50,12 @@ esp_err_t wait_for_time_sync(int timeout_seconds) {
     vTaskDelay(pdMS_TO_TICKS(1000));
     retry++;
 
-    // 可选：检查是否已经同步
-    if (get_current_time() == ESP_OK) {
+    time(&now);
+    localtime_r(&now, &timeinfo);
+
+    // 检查时间是否有效（2000年之后）
+    if (timeinfo.tm_year > 100) {
+      ESP_LOGW(TAG, "时间未同步，正在等待NTP...");
       return ESP_OK;
     }
   }
@@ -73,30 +69,8 @@ esp_err_t wait_for_time_sync(int timeout_seconds) {
   return ESP_OK;
 }
 
-void time_example_task(void* pvParameters) {
-  get_time_init();
-
-  // 等待首次同步
-  if (wait_for_time_sync(15) != ESP_OK) {
-    ESP_LOGW(TAG, "首次同步失败，将继续重试");
-  }
-
-  while (1) {
-    // 每隔一段时间获取时间
-    if (get_current_time() == ESP_OK) {
-      // 时间有效，执行你的业务逻辑
-    } else {
-      // 时间无效，可能需要重新同步
-      ESP_LOGW(TAG, "时间无效，尝试重新同步...");
-    }
-
-    vTaskDelay(pdMS_TO_TICKS(60000));  // 每分钟获取一次
-  }
-}
-
 char* get_file_name_from_time(void) {
   wait_for_time_sync(10);
-  localtime_r(&now, &timeinfo);
   strftime(record_file_name, 48, "/sdcard/Record_%Y%m%d%H%M%S.wav", &timeinfo);
   return record_file_name;
 }
@@ -137,10 +111,9 @@ esp_err_t record_wav(i2s_chan_handle_t i2s_rx_chan) {
                         TAG, "error while writing samples to wav file");
     wav_written += bytes_read;
   }
-
   i2s_channel_disable(i2s_rx_chan);
   ESP_LOGI(TAG, "Recording done! Flushing file buffer");
   fclose(f);
-
+  send_audio_transcription(record_file_name);
   return ret;
 }
\ No newline at end of file
diff --git a/main/record.h b/main/record.h
index 2e1470a..20e256b 100644
--- a/main/record.h
+++ b/main/record.h
@@ -3,5 +3,5 @@
 esp_err_t record_wav(i2s_chan_handle_t i2s_rx_chan);
 
 void get_time_init();
-esp_err_t get_current_time();
+esp_err_t print_current_time();
 esp_err_t wait_for_time_sync(int timeout_seconds);
\ No newline at end of file