refactor(dma): split rx buffer to cache aligned ones

This commit is contained in:
morris 2025-01-17 16:32:38 +08:00
parent 8354773714
commit 8bb6f764e1
4 changed files with 114 additions and 97 deletions

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2023-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -24,68 +24,101 @@
static const char *TAG = "dma_utils";
#define ALIGN_UP_BY(num, align) (((num) + ((align) - 1)) & ~((align) - 1))
#define ALIGN_DOWN_BY(num, align) ((num) & (~((align) - 1)))
esp_err_t esp_dma_split_buffer_to_aligned(void *input_buffer, size_t input_buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array)
esp_err_t esp_dma_split_rx_buffer_to_cache_aligned(void *rx_buffer, size_t buffer_len, dma_buffer_split_array_t *align_buf_array, uint8_t** ret_stash_buffer)
{
esp_err_t ret = ESP_OK;
ESP_RETURN_ON_FALSE(align_array && input_buffer && input_buffer_len && stash_buffer && split_alignment && !(split_alignment & (split_alignment - 1)
&& (stash_buffer_len >= 2 * split_alignment)), ESP_ERR_INVALID_ARG, TAG, "invalid argument");
ESP_RETURN_ON_FALSE(!((uintptr_t)stash_buffer % split_alignment), ESP_ERR_INVALID_ARG, TAG, "extra buffer is not aligned");
ESP_RETURN_ON_FALSE(rx_buffer && buffer_len && align_buf_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
// calculate head_overflow_len
size_t head_overflow_len = (uintptr_t)input_buffer % split_alignment;
head_overflow_len = head_overflow_len ? split_alignment - head_overflow_len : 0;
ESP_LOGD(TAG, "head_addr:%p split_alignment:%zu head_overflow_len:%zu", input_buffer, split_alignment, head_overflow_len);
// calculate tail_overflow_len
size_t tail_overflow_len = ((uintptr_t)input_buffer + input_buffer_len) % split_alignment;
ESP_LOGD(TAG, "tail_addr:%p split_alignment:%zu tail_overflow_len:%zu", input_buffer + input_buffer_len - tail_overflow_len, split_alignment, tail_overflow_len);
// read the cache line size of internal and external memory, we also use this information to check if a given memory is behind the cache
size_t int_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_INT_MEM, CACHE_TYPE_DATA);
size_t ext_mem_cache_line_size = cache_hal_get_cache_line_size(CACHE_LL_LEVEL_EXT_MEM, CACHE_TYPE_DATA);
uint32_t extra_buf_count = 0;
input_buffer = (uint8_t*)input_buffer;
stash_buffer = (uint8_t*)stash_buffer;
align_array->buf.head.recovery_address = input_buffer;
align_array->buf.head.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++;
align_array->buf.head.length = head_overflow_len;
align_array->buf.body.recovery_address = input_buffer + head_overflow_len;
align_array->buf.body.aligned_buffer = input_buffer + head_overflow_len;
align_array->buf.body.length = input_buffer_len - head_overflow_len - tail_overflow_len;
align_array->buf.tail.recovery_address = input_buffer + input_buffer_len - tail_overflow_len;
align_array->buf.tail.aligned_buffer = stash_buffer + split_alignment * extra_buf_count++;
align_array->buf.tail.length = tail_overflow_len;
size_t split_line_size = 0;
if (esp_ptr_external_ram(rx_buffer)) {
split_line_size = ext_mem_cache_line_size;
} else if (esp_ptr_internal(rx_buffer)) {
split_line_size = int_mem_cache_line_size;
}
ESP_LOGV(TAG, "split_line_size:%zu", split_line_size);
// special handling when input_buffer length is no more than buffer alignment
if(head_overflow_len >= input_buffer_len || tail_overflow_len >= input_buffer_len)
{
align_array->buf.head.length = input_buffer_len ;
align_array->buf.body.length = 0 ;
align_array->buf.tail.length = 0 ;
// allocate the stash buffer from internal RAM
// Note, the split_line_size can be 0, in this case, the stash_buffer is also NULL, which is fine
uint8_t* stash_buffer = heap_caps_calloc(2, split_line_size, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
ESP_RETURN_ON_FALSE(!(split_line_size && !stash_buffer), ESP_ERR_NO_MEM, TAG, "no mem for stash buffer");
// clear align_array to avoid garbage data
memset(align_buf_array, 0, sizeof(dma_buffer_split_array_t));
bool need_cache_sync[3] = {false};
// if split_line_size is non-zero, split the buffer into head, body and tail
if (split_line_size > 0) {
// calculate head_overflow_len
size_t head_overflow_len = (uintptr_t)rx_buffer % split_line_size;
head_overflow_len = head_overflow_len ? split_line_size - head_overflow_len : 0;
ESP_LOGV(TAG, "head_addr:%p head_overflow_len:%zu", rx_buffer, head_overflow_len);
// calculate tail_overflow_len
size_t tail_overflow_len = ((uintptr_t)rx_buffer + buffer_len) % split_line_size;
ESP_LOGV(TAG, "tail_addr:%p tail_overflow_len:%zu", rx_buffer + buffer_len - tail_overflow_len, tail_overflow_len);
uint8_t extra_buf_count = 0;
uint8_t* input_buffer = (uint8_t*)rx_buffer;
align_buf_array->buf.head.recovery_address = input_buffer;
align_buf_array->buf.head.aligned_buffer = stash_buffer + split_line_size * extra_buf_count++;
align_buf_array->buf.head.length = head_overflow_len;
need_cache_sync[0] = int_mem_cache_line_size > 0;
align_buf_array->buf.body.recovery_address = input_buffer + head_overflow_len;
align_buf_array->buf.body.aligned_buffer = input_buffer + head_overflow_len;
align_buf_array->buf.body.length = buffer_len - head_overflow_len - tail_overflow_len;
need_cache_sync[1] = true;
align_buf_array->buf.tail.recovery_address = input_buffer + buffer_len - tail_overflow_len;
align_buf_array->buf.tail.aligned_buffer = stash_buffer + split_line_size * extra_buf_count++;
align_buf_array->buf.tail.length = tail_overflow_len;
need_cache_sync[2] = int_mem_cache_line_size > 0;
// special handling when input_buffer length is no more than buffer alignment
if (head_overflow_len >= buffer_len || tail_overflow_len >= buffer_len) {
align_buf_array->buf.head.length = buffer_len ;
align_buf_array->buf.body.length = 0 ;
align_buf_array->buf.tail.length = 0 ;
}
} else {
align_buf_array->buf.body.aligned_buffer = rx_buffer;
align_buf_array->buf.body.recovery_address = rx_buffer;
align_buf_array->buf.body.length = buffer_len;
need_cache_sync[1] = false;
}
for(int i = 0; i < 3; i++) {
if(!align_array->aligned_buffer[i].length) {
align_array->aligned_buffer[i].aligned_buffer = NULL;
align_array->aligned_buffer[i].recovery_address = NULL;
for (int i = 0; i < 3; i++) {
if (align_buf_array->aligned_buffer[i].length == 0) {
align_buf_array->aligned_buffer[i].aligned_buffer = NULL;
align_buf_array->aligned_buffer[i].recovery_address = NULL;
need_cache_sync[i] = false;
}
}
return ret;
// invalidate the aligned buffer if necessary
for (int i = 0; i < 3; i++) {
if (need_cache_sync[i]) {
esp_cache_msync(align_buf_array->aligned_buffer[i].aligned_buffer, align_buf_array->aligned_buffer[i].length, ESP_CACHE_MSYNC_FLAG_DIR_M2C);
}
}
*ret_stash_buffer = stash_buffer;
return ESP_OK;
}
esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array)
esp_err_t esp_dma_merge_aligned_rx_buffers(dma_buffer_split_array_t *align_array)
{
esp_err_t ret = ESP_OK;
ESP_RETURN_ON_FALSE(align_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
ESP_RETURN_ON_FALSE_ISR(align_array, ESP_ERR_INVALID_ARG, TAG, "invalid argument");
// only need to copy the head and tail buffer
if(align_array->buf.head.length) {
if (align_array->buf.head.length) {
memcpy(align_array->buf.head.recovery_address, align_array->buf.head.aligned_buffer, align_array->buf.head.length);
}
if(align_array->buf.tail.length) {
if (align_array->buf.tail.length) {
memcpy(align_array->buf.tail.recovery_address, align_array->buf.tail.aligned_buffer, align_array->buf.tail.length);
}
return ret;
return ESP_OK;
}
esp_err_t esp_dma_capable_malloc(size_t size, const esp_dma_mem_info_t *dma_mem_info, void **out_ptr, size_t *actual_size)

View File

@ -6,14 +6,8 @@
#include <stdlib.h>
#include <string.h>
#include <stdatomic.h>
#include <sys/cdefs.h>
#include <sys/lock.h>
#include "sdkconfig.h"
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "soc/soc_caps.h"
#include "soc/ext_mem_defs.h"
#include "esp_log.h"
#include "esp_check.h"
#include "esp_memory_utils.h"

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2023-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -24,6 +24,8 @@ typedef struct {
/**
* @brief DMA buffer aligned array
* The array contains three parts: head, body and tail.
* Length of each part will be >=0, especially, length=0 means that there is no such part.
*/
typedef struct {
union {
@ -37,22 +39,21 @@ typedef struct {
} dma_buffer_split_array_t;
/**
* @brief Split unaligned DMA buffer to aligned DMA buffer or aligned DMA buffer array
* @brief Split DMA RX buffer to cache aligned buffers
*
* @note Returned align array contains three parts: head, body and tail. Length of each buffer will be >=0, length 0 means that there is no such part
* @note After the original RX buffer is split into an array, caller should mount the buffer array to the DMA controller in scatter-gather mode.
* Don't read/write the aligned buffers before the DMA finished using them.
*
* @param[in] buffer Origin DMA buffer address
* @param[in] buffer_len Origin DMA buffer length
* @param[in] stash_buffer Needed extra buffer to stash aligned buffer, should be allocated with DMA capable memory and aligned to split_alignment
* @param[in] stash_buffer_len stash_buffer length
* @param[in] split_alignment Alignment of each buffer required by the DMA
* @param[out] align_array Aligned DMA buffer array
* @param[in] rx_buffer The origin DMA buffer used for receiving data
* @param[in] buffer_len rx_buffer length
* @param[out] align_buf_array Aligned DMA buffer array
* @param[out] ret_stash_buffer Allocated stash buffer (caller should free it after use)
* @return
* - ESP_OK: Split to aligned buffer successfully
* - ESP_ERR_INVALID_ARG: Split to aligned buffer failed because of invalid argument
*
* brief sketch:
* buffer alignment delimiter buffer alignment delimiter
* cache alignment delimiter cache alignment delimiter
*
* Origin Buffer Origin Buffer
*
@ -68,17 +69,19 @@ typedef struct {
*
* |xxxxx......| |xxxxx......|
*/
esp_err_t esp_dma_split_buffer_to_aligned(void *buffer, size_t buffer_len, void *stash_buffer, size_t stash_buffer_len, size_t split_alignment, dma_buffer_split_array_t *align_array);
esp_err_t esp_dma_split_rx_buffer_to_cache_aligned(void *rx_buffer, size_t buffer_len, dma_buffer_split_array_t *align_buf_array, uint8_t** ret_stash_buffer);
/**
* @brief Merge aligned buffer array to origin buffer
* @brief Merge aligned RX buffer array to origin buffer
*
* @param[in] align_array Aligned DMA buffer array
* @note This function can be used in the ISR context.
*
* @param[in] align_buf_array Aligned DMA buffer array
* @return
* - ESP_OK: Merge aligned buffer to origin buffer successfully
* - ESP_ERR_INVALID_ARG: Merge aligned buffer to origin buffer failed because of invalid argument
*/
esp_err_t esp_dma_merge_aligned_buffers(dma_buffer_split_array_t *align_array);
esp_err_t esp_dma_merge_aligned_rx_buffers(dma_buffer_split_array_t *align_buf_array);
#ifdef __cplusplus
}

View File

@ -395,25 +395,18 @@ TEST_CASE("GDMA M2M Mode", "[GDMA][M2M]")
typedef struct {
SemaphoreHandle_t done_sem;
dma_buffer_split_array_t *align_array;
size_t split_alignment;
bool need_invalidate;
} test_gdma_context_t;
static bool test_gdma_m2m_unalgined_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data)
static bool test_gdma_m2m_unaligned_rx_eof_callback(gdma_channel_handle_t dma_chan, gdma_event_data_t *event_data, void *user_data)
{
BaseType_t task_woken = pdFALSE;
test_gdma_context_t *user_ctx = (test_gdma_context_t*)user_data;
for (int i = 0; i < 3; i++) {
if (user_ctx->align_array->aligned_buffer[i].aligned_buffer && user_ctx->need_invalidate) {
TEST_ESP_OK(esp_cache_msync(user_ctx->align_array->aligned_buffer[i].aligned_buffer, ALIGN_UP(user_ctx->align_array->aligned_buffer[i].length, user_ctx->split_alignment), ESP_CACHE_MSYNC_FLAG_DIR_M2C));
}
}
TEST_ESP_OK(esp_dma_merge_aligned_buffers(user_ctx->align_array));
TEST_ESP_OK(esp_dma_merge_aligned_rx_buffers(user_ctx->align_array));
xSemaphoreGiveFromISR(user_ctx->done_sem, &task_woken);
return task_woken == pdTRUE;
}
static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_data, size_t data_length, size_t offset_len, size_t split_alignment)
static void test_gdma_m2m_unaligned_buffer_test(uint8_t *dst_data, uint8_t *src_data, size_t data_length, size_t offset_len)
{
TEST_ASSERT_NOT_NULL(src_data);
TEST_ASSERT_NOT_NULL(dst_data);
@ -458,13 +451,10 @@ static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_
};
TEST_ESP_OK(gdma_link_mount_buffers(tx_link_list, 0, tx_buf_mount_config, sizeof(tx_buf_mount_config) / sizeof(gdma_buffer_mount_config_t), NULL));
// allocate stash_buffer, should be freed by the user
void *stash_buffer = heap_caps_aligned_calloc(split_alignment, 2, split_alignment, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
size_t stash_buffer_len = 2 * split_alignment;
dma_buffer_split_array_t align_array = {0};
gdma_buffer_mount_config_t rx_aligned_buf_mount_config[3] = {0};
TEST_ESP_OK(esp_dma_split_buffer_to_aligned(dst_data + offset_len, data_length, stash_buffer, stash_buffer_len, split_alignment, &align_array));
uint8_t* stash_buffer = NULL;
TEST_ESP_OK(esp_dma_split_rx_buffer_to_cache_aligned(dst_data + offset_len, data_length, &align_array, &stash_buffer));
for (int i = 0; i < 3; i++) {
rx_aligned_buf_mount_config[i].buffer = align_array.aligned_buffer[i].aligned_buffer;
rx_aligned_buf_mount_config[i].length = align_array.aligned_buffer[i].length;
@ -472,15 +462,13 @@ static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_
TEST_ESP_OK(gdma_link_mount_buffers(rx_link_list, 0, rx_aligned_buf_mount_config, 3, NULL));
gdma_rx_event_callbacks_t rx_cbs = {
.on_recv_eof = test_gdma_m2m_unalgined_rx_eof_callback,
.on_recv_eof = test_gdma_m2m_unaligned_rx_eof_callback,
};
SemaphoreHandle_t done_sem = xSemaphoreCreateBinary();
TEST_ASSERT_NOT_NULL(done_sem);
test_gdma_context_t user_ctx = {
.done_sem = done_sem,
.align_array = &align_array,
.split_alignment = split_alignment,
.need_invalidate = sram_alignment ? true : false,
};
TEST_ESP_OK(gdma_register_rx_event_callbacks(rx_chan, &rx_cbs, &user_ctx));
@ -494,12 +482,12 @@ static void test_gdma_m2m_unalgined_buffer_test(uint8_t *dst_data, uint8_t *src_
TEST_ASSERT_EQUAL(i % 256, dst_data[i + offset_len]);
}
free(stash_buffer);
TEST_ESP_OK(gdma_del_link_list(tx_link_list));
TEST_ESP_OK(gdma_del_link_list(rx_link_list));
TEST_ESP_OK(gdma_del_channel(tx_chan));
TEST_ESP_OK(gdma_del_channel(rx_chan));
vSemaphoreDelete(done_sem);
free(stash_buffer);
}
TEST_CASE("GDMA M2M Unaligned RX Buffer Test", "[GDMA][M2M]")
@ -507,29 +495,28 @@ TEST_CASE("GDMA M2M Unaligned RX Buffer Test", "[GDMA][M2M]")
uint8_t *sbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
uint8_t *dbuf = heap_caps_aligned_calloc(64, 1, 10240, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
size_t split_alignment = 64;
// case buffer len less than buffer alignment
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 0, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 4, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 60, 2, split_alignment);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 60, 0);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 60, 4);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 60, 2);
// case buffer head aligned
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 0, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 0, split_alignment);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 246, 0);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 8182, 0);
// case buffer tail aligned
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 246, 10, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8182, 10, split_alignment);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 246, 10);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 8182, 10);
// case buffer unaligned
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 100, 10, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 10, 60, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 10, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 10, split_alignment);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 100, 10);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 10, 60);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 256, 10);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 8192, 10);
// case buffer full aligned
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 256, 0, split_alignment);
test_gdma_m2m_unalgined_buffer_test(dbuf, sbuf, 8192, 0, split_alignment);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 256, 0);
test_gdma_m2m_unaligned_buffer_test(dbuf, sbuf, 8192, 0);
free(sbuf);
free(dbuf);