doc(lp_vad): lp vad programming guide

This commit is contained in:
Armando 2024-11-08 12:26:05 +08:00
parent e4a9243517
commit 37d8e97a9d
12 changed files with 206 additions and 25 deletions

View File

@ -78,8 +78,9 @@ typedef struct vad_unit_ctx_t *vad_unit_handle_t;
typedef struct {
int init_frame_num; /**< Number of init frames that are used for VAD to denoise, this helps the VAD to decrease the accidental trigger ratio.
Note too big values may lead to voice activity miss */
int min_energy_thresh; ///< Min energy threshold.
bool skip_band_energy_thresh; ///< Skip band energy threshold or not
int min_energy_thresh; ///< Minimum energy threshold, voice activities with energy higher than this value will be detected.
bool skip_band_energy_thresh; /**< Skip band energy threshold or not, the passband energy check determines whether the proportion of passband energy within the total frequency domain meets the required threshold.
Note in different environments, enabling the passband energy check may reduce false trigger rates but could also increase the rate of missed detections. */
int speak_activity_thresh; /**< When in speak-activity-listening-state, if number of the detected speak activity is higher than this value, VAD runs into speak-activity-detected-state */
@ -93,6 +94,9 @@ typedef struct {
int max_speak_activity_thresh; /**< When in speak-activity-detected-state, if the number of the detected speak activity is higher than this value, VAD runs into speak-activity-listening-state */
} lp_vad_config_t;
/**
* @brief LP VAD Init Configurations
*/
typedef struct {
lp_i2s_chan_handle_t lp_i2s_chan; ///< LP I2S channel handle
lp_vad_config_t vad_config; ///< LP VAD config
@ -115,7 +119,6 @@ esp_err_t lp_i2s_vad_new_unit(lp_vad_t vad_id, const lp_vad_init_config_t *init_
* @brief Enable LP VAD
*
* @param[in] unit VAD handle
* @param[in] init_config Initial configurations
*
* @return
* - ESP_OK: On success
@ -128,7 +131,6 @@ esp_err_t lp_i2s_vad_enable(vad_unit_handle_t unit);
* @brief Disable LP VAD
*
* @param[in] unit VAD handle
* @param[in] init_config Initial configurations
*
* @return
* - ESP_OK: On success

View File

@ -82,7 +82,6 @@ esp_err_t lp_core_lp_vad_init(lp_vad_t vad_id, const lp_core_lp_vad_cfg_t *init_
* @brief Enable LP VAD
*
* @param[in] vad_id VAD ID
* @param[in] init_config Initial configurations
*
* @return
* - ESP_OK: On success
@ -95,7 +94,6 @@ esp_err_t lp_core_lp_vad_enable(lp_vad_t vad_id);
* @brief Disable LP VAD
*
* @param[in] vad_id VAD ID
* @param[in] init_config Initial configurations
*
* @return
* - ESP_OK: On success

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

View File

@ -174,6 +174,10 @@ SPI_DOCS = ['api-reference/peripherals/spi_master.rst',
I2S_DOCS = ['api-reference/peripherals/i2s.rst']
VAD_DOCS = ['api-reference/peripherals/vad.rst']
LP_I2S_DOCS = ['api-reference/peripherals/lp_i2s.rst']
ISP_DOCS = ['api-reference/peripherals/isp.rst']
DSLP_STUB_DOCS = ['api-guides/deep-sleep-stub.rst']
@ -286,6 +290,8 @@ conditional_include_dict = {'SOC_BT_SUPPORTED':BT_DOCS,
'SOC_I2C_SUPPORTED':I2C_DOCS,
'SOC_GPSPI_SUPPORTED':SPI_DOCS,
'SOC_I2S_SUPPORTED':I2S_DOCS,
'SOC_LP_I2S_SUPPORTED':LP_I2S_DOCS,
'SOC_LP_VAD_SUPPORTED':VAD_DOCS,
'SOC_ISP_SUPPORTED':ISP_DOCS,
'ESP_ROM_SUPPORT_DEEP_SLEEP_WAKEUP_STUB': DSLP_STUB_DOCS,
'SOC_ADC_SUPPORTED':ADC_DOCS,

View File

@ -125,9 +125,6 @@ INPUT = \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/i2s_pdm.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/i2s_std.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/i2s_tdm.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/lp_i2s.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/lp_i2s_std.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/lp_i2s_pdm.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/i2s_types.h \
$(PROJECT_PATH)/components/esp_driver_pcnt/include/driver/pulse_cnt.h \
$(PROJECT_PATH)/components/esp_driver_rmt/include/driver/rmt_common.h \

View File

@ -11,6 +11,7 @@ INPUT += \
$(PROJECT_PATH)/components/ulp/lp_core/lp_core/include/ulp_lp_core_utils.h \
$(PROJECT_PATH)/components/ulp/lp_core/lp_core/include/ulp_lp_core_interrupts.h \
$(PROJECT_PATH)/components/ulp/lp_core/lp_core/include/ulp_lp_core_spi.h \
$(PROJECT_PATH)/components/ulp/lp_core/shared/include/ulp_lp_core_lp_vad_shared.h \
$(PROJECT_PATH)/components/ulp/ulp_common/include/ulp_common.h \
$(PROJECT_PATH)/components/usb/include/usb/usb_helpers.h \
$(PROJECT_PATH)/components/usb/include/usb/usb_host.h \
@ -41,6 +42,10 @@ INPUT += \
$(PROJECT_PATH)/components/esp_driver_isp/include/driver/isp_gamma.h \
$(PROJECT_PATH)/components/esp_driver_isp/include/driver/isp_hist.h \
$(PROJECT_PATH)/components/esp_driver_isp/include/driver/isp_color.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/lp_i2s.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/lp_i2s_std.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/lp_i2s_pdm.h \
$(PROJECT_PATH)/components/esp_driver_i2s/include/driver/lp_i2s_vad.h \
$(PROJECT_PATH)/components/esp_driver_jpeg/include/driver/jpeg_decode.h \
$(PROJECT_PATH)/components/esp_driver_jpeg/include/driver/jpeg_encode.h \
$(PROJECT_PATH)/components/esp_driver_ppa/include/driver/ppa.h \

View File

@ -42,6 +42,7 @@ Peripherals API
:SOC_GPSPI_SUPPORTED: spi_slave
:SOC_SPI_SUPPORT_SLAVE_HD_VER2: spi_slave_hd
:SOC_LP_I2S_SUPPORTED: lp_i2s
:SOC_LP_VAD_SUPPORTED: vad
:SOC_TEMP_SENSOR_SUPPORTED: temp_sensor
:SOC_TOUCH_SENSOR_SUPPORTED and not esp32p4: touch_pad
:esp32p4: cap_touch_sens

View File

@ -24,7 +24,7 @@ A basic I2S data bus has one master and one slave. The roles remain unchanged th
LP I2S on {IDF_TARGET_NAME} only supports working as an I2S Slave.
The LP I2S module on {IDF_TARGET_NAME} provides an independent RX unit, which supports receiving data when the chip is running with the lowest power consumption. Compared to HP I2S, LP I2S does not support DMA access. Instead, it uses a piece of separate internal memory to store data.
The LP I2S module on {IDF_TARGET_NAME} provides an independent RX unit, which supports receiving data when the chip is running under sleep modes. Compared to HP I2S, LP I2S does not support DMA access. Instead, it uses a piece of separate internal memory to store data.
I2S Communication Mode
@ -33,24 +33,12 @@ I2S Communication Mode
Standard Mode
^^^^^^^^^^^^^
In standard mode, there are always two sound channels, i.e., the left and right channels, which are called "slots". These slots support 16-bit-width sample data. The communication format for the slots mainly includes the following:
- **Philips Format**: Data signal has one-bit shift comparing to the WS signal, and the duty of WS signal is 50%.
.. wavedrom:: /../_static/diagrams/i2s/std_philips.json
- **MSB Format**: Basically the same as Philips format, but without data shift.
.. wavedrom:: /../_static/diagrams/i2s/std_msb.json
- **PCM Short Format**: Data has one-bit shift and meanwhile the WS signal becomes a pulse lasting for one BCLK cycle.
.. wavedrom:: /../_static/diagrams/i2s/std_pcm.json
In standard mode, there are always two sound channels, i.e., the left and right channels, which are called "slots". These slots support 16-bit-width sample data. The communication format for the slots can be found in this :ref:`i2s-communication-mode` section.
PDM Mode (RX)
^^^^^^^^^^^^^
PDM (Pulse-density Modulation) mode for RX channel can receive PDM-format data. Only 16-bit-width sample data are supported.
PDM (Pulse-density Modulation) mode for RX channel can receive PDM-format data. Only 16-bit-width sample data are supported. The communication format for the slots can be found in this :ref:`i2s-communication-mode` section.
Functional Overview
@ -144,4 +132,3 @@ API Reference
.. include-build-file:: inc/lp_i2s.inc
.. include-build-file:: inc/lp_i2s_std.inc
.. include-build-file:: inc/lp_i2s_pdm.inc
.. include-build-file:: inc/components/esp_driver_i2s/include/driver/i2s_types.inc

View File

@ -0,0 +1,181 @@
Voice Activity Detection (VAD)
==============================
:link_to_translation:`zh_CN:[中文]`
Introduction
------------
Voice Activity Detection (VAD) module facilitates the hardware implementation of the first-stage algorithm for voice wake-up and other multimedia functions.
Additionally, it provides hardware support for low-power voice wake-up solutions.
.. only:: SOC_LP_I2S_SUPPORTED
For LP I2S documentation, see :doc:`Low Power Inter-IC Sound <./lp_i2s>`.
Hardware State Machine
----------------------
LP VAD driver provides a structure :cpp:type:`lp_vad_config_t` to configure the LP VAD module:
- :cpp:member:`lp_vad_config_t::init_frame_num`, number of init frames that are used for VAD to denoise, this helps the VAD to decrease the accidental trigger ratio. Note too big values may lead to voice activity miss.
- :cpp:member:`lp_vad_config_t::min_energy_thresh`, minimum energy threshold, voice activities with energy higher than this value will be detected.
- :cpp:member:`lp_vad_config_t::skip_band_energy_thresh`, skip band energy threshold or not, the passband energy check determines whether the proportion of passband energy within the total frequency domain meets the required threshold. Note in different environments, enabling the passband energy check may reduce false trigger rates but could also increase the rate of missed detections.
- :cpp:member:`lp_vad_config_t::speak_activity_thresh`, when in speak-activity-listening-state, if number of the detected speak activity is higher than this value, VAD runs into speak-activity-detected-state.
- :cpp:member:`lp_vad_config_t::non_speak_activity_thresh`, when in speak-activity-detected-state, if the number of the detected speak activity is higher than this value, but lower than :cpp:member:`lp_vad_config_t::max_speak_activity_thresh`,
* if the number of the detected non-speak activity is higher than this value, VAD runs into speak-activity-listening-state
* if the number of the detected non-speak activity is lower than this value, VAD keeps in speak-activity-detected-state
- :cpp:member:`lp_vad_config_t::min_speak_activity_thresh`, when in speak-activity-detected-state, if the number of the detected speak activity is higher than this value, but lower than :cpp:member:`lp_vad_config_t::max_speak_activity_thresh`, then the VAD state machine will depends on the value of :cpp:member:`lp_vad_config_t::non_speak_activity_thresh`
- :cpp:member:`lp_vad_config_t::max_speak_activity_thresh`, when in speak-activity-detected-state, if the number of the detected speak activity is higher than this value, VAD runs into speak-activity-listening-state
Above configurations can change the VAD state machine shown below:
.. code-block:: text
┌──────────────────────────────────┐
│ │
┌─────────────┤ speak-activity-listening-state │ ◄───────────────┐
│ │ │ │
│ └──────────────────────────────────┘ │
│ ▲ │
│ │ │
│ │ │
│ │ │
│ │ │
detected speak activity │ │ detected speak activity │ detected speak activity
>= │ │ >= │ >=
'speak_activity_thresh' │ │ 'min_speak_activity_thresh' │ 'max_speak_activity_thresh'
│ │ │
│ │ && │
│ │ │
│ │ detected non-speak activity │
│ │ < │
│ │ 'non_speak_activity_thresh' │
│ │ │
│ │ │
│ │ │
│ │ │
│ │ │
│ ┌───────────┴─────────────────────┐ │
│ │ │ │
└───────────► │ speak-activity-detected-state ├─────────────────┘
│ │
└─┬───────────────────────────────┘
│ ▲
│ │
│ │
│ │ detected speak activity
│ │ >=
│ │ 'min_speak_activity_thresh'
│ │
│ │ &&
│ │
│ │ detected non-speak activity
│ │ <
└─────────────────────┘ 'non_speak_activity_thresh'
HP Driver Functional Overview
-----------------------------
The VAD HP driver is used for configure the LP VAD to be working under the control of the HP core. The HP core can also be woken up by the VAD when voice activity is detected.
Resource Allocation
^^^^^^^^^^^^^^^^^^^
.. only:: SOC_LP_I2S_SUPPORT_VAD
:cpp:type:`lp_vad_init_config_t` is the configuration structure that is needed to create a LP I2S VAD unit handle. To create a LP I2S VAD unit handle, you will need to first create a LP I2S channel handle. see :doc:`Low Power Inter-IC Sound <./lp_i2s>`.
You can call :cpp:func:`lp_i2s_vad_new_unit` to create the handle. If the VAD unit is no longer used, you should recycle the allocated resource by calling :cpp:func:`lp_i2s_vad_del_unit`.
.. code:: c
vad_unit_handle_t vad_handle = NULL;
lp_vad_init_config_t init_config = {
.lp_i2s_chan = rx_handle,
.vad_config = {
.init_frame_num = 100,
.min_energy_thresh = 100,
.speak_activity_thresh = 10,
.non_speak_activity_thresh = 30,
.min_speak_activity_thresh = 3,
.max_speak_activity_thresh = 100,
},
};
ESP_ERROR_CHECK(lp_i2s_vad_new_unit(vad_id, init_config, &vad_handle));
ESP_ERROR_CHECK(lp_i2s_vad_del_unit(vad_handle));
Enable and Disable the VAD
^^^^^^^^^^^^^^^^^^^^^^^^^^
.. only:: SOC_LP_I2S_SUPPORT_VAD
Before using a VAD unit to detect voice activity, you need to enable the VAD unit by calling :cpp:func:`lp_i2s_vad_enable`, this function switches the driver state from **init** to **enable**, and also enables the VAD hardware. Calling :cpp:func:`lp_i2s_vad_disable` does the opposite, that is, put the driver back to the **init** state, the hardware will stop as well.
HP Core Wake-up
^^^^^^^^^^^^^^^
.. only:: SOC_LP_I2S_SUPPORT_VAD
:cpp:func:`esp_sleep_enable_vad_wakeup` can help you to set the VAD to be working as the HP core wake-up source. To make VAD work during sleep, you should let the system maintain the RTC domain and XTAL power. See code example below:
.. code:: c
ESP_ERROR_CHECK(esp_sleep_enable_vad_wakeup());
LP Driver Functional Overview
-----------------------------
The VAD LP driver is mainly for LP core wake-up. The VAD can be configured under HP core control, then it can wakeup the LP core when voice activities are detected.
Resource Allocation
^^^^^^^^^^^^^^^^^^^
.. only:: SOC_LP_I2S_SUPPORT_VAD
:cpp:type:`lp_core_lp_vad_cfg_t` and :cpp:func:`lp_core_lp_vad_init` are used to initialize the VAD LP driver.
:cpp:func:`lp_core_lp_vad_deinit` is used to recycle the allocated resources.
Enable and Disable the VAD
^^^^^^^^^^^^^^^^^^^^^^^^^^
.. only:: SOC_LP_I2S_SUPPORT_VAD
:cpp:func:`lp_core_lp_vad_enable` and :cpp:func:`lp_core_lp_vad_disable` are used for enabling / disabling the hardware.
LP Core Wake-up
^^^^^^^^^^^^^^^
.. only:: SOC_LP_I2S_SUPPORT_VAD
Set :c:macro:`ULP_LP_CORE_WAKEUP_SOURCE_LP_VAD` in :cpp:type:`ulp_lp_core_cfg_t` to enable the VAD to be working as the LP core wake-up source.
.. code:: c
static void load_and_start_lp_core_firmware(ulp_lp_core_cfg_t* cfg, const uint8_t* firmware_start, const uint8_t* firmware_end)
{
TEST_ASSERT(ulp_lp_core_load_binary(firmware_start,
(firmware_end - firmware_start)) == ESP_OK);
TEST_ASSERT(ulp_lp_core_run(cfg) == ESP_OK);
}
ulp_lp_core_cfg_t cfg = {
.wakeup_source = ULP_LP_CORE_WAKEUP_SOURCE_LP_VAD,
};
load_and_start_lp_core_firmware(&cfg, lp_core_main_vad_bin_start, lp_core_main_vad_bin_end);
API Reference
-------------
.. include-build-file:: inc/lp_i2s_vad.inc
.. include-build-file:: inc/ulp_lp_core_lp_vad_shared.inc

View File

@ -40,6 +40,8 @@
:SOC_GPSPI_SUPPORTED: spi_master
:SOC_GPSPI_SUPPORTED: spi_slave
:SOC_SPI_SUPPORT_SLAVE_HD_VER2: spi_slave_hd
:SOC_LP_I2S_SUPPORTED: lp_i2s
:SOC_LP_VAD_SUPPORTED: vad
:SOC_JPEG_CODEC_SUPPORTED: jpeg
:SOC_TEMP_SENSOR_SUPPORTED: temp_sensor
:SOC_TOUCH_SENSOR_SUPPORTED and not esp32p4: touch_pad

View File

@ -0,0 +1 @@
.. include:: ../../../en/api-reference/peripherals/lp_i2s.rst

View File

@ -0,0 +1 @@
.. include:: ../../../en/api-reference/peripherals/vad.rst