Merge branch 'feature/malloc_cap_simd_flag' into 'master'

feat(heap): add a MALLOC_CAP_SIMD flag

Closes IDF-11813

See merge request espressif/esp-idf!35471
This commit is contained in:
Song Ruo Jing 2025-01-17 11:35:22 +08:00
commit bbdef6a11d
12 changed files with 70 additions and 16 deletions

View File

@ -20,7 +20,7 @@
#define HEAP_IRAM_ATTR IRAM_ATTR
#endif
#define CAPS_NEEDING_ALIGNMENT (MALLOC_CAP_DMA|MALLOC_CAP_DMA_DESC_AHB|MALLOC_CAP_DMA_DESC_AXI|MALLOC_CAP_CACHE_ALIGNED)
#define CAPS_NEEDING_ALIGNMENT (MALLOC_CAP_DMA|MALLOC_CAP_DMA_DESC_AHB|MALLOC_CAP_DMA_DESC_AXI|MALLOC_CAP_CACHE_ALIGNED|MALLOC_CAP_SIMD)
HEAP_IRAM_ATTR void esp_heap_adjust_alignment_to_hw(size_t *p_alignment, size_t *p_size, uint32_t *p_caps)
{
@ -72,6 +72,14 @@ HEAP_IRAM_ATTR void esp_heap_adjust_alignment_to_hw(size_t *p_alignment, size_t
if (cache_alignment_bytes > alignment) {
alignment = cache_alignment_bytes;
}
#if SOC_SIMD_INSTRUCTION_SUPPORTED
// SIMD instructions preferred data alignment, SOC_SIMD_PREFERRED_DATA_ALIGNMENT, which is also definitely a power of two
if (caps & MALLOC_CAP_SIMD) {
alignment = (alignment > SOC_SIMD_PREFERRED_DATA_ALIGNMENT) ? alignment : SOC_SIMD_PREFERRED_DATA_ALIGNMENT;
}
#endif
// Align up `size` to resulting alignment as well.
size = (size + alignment - 1) & (~(alignment - 1));

View File

@ -369,7 +369,7 @@ esp_err_t esp_psram_extram_add_to_heap_allocator(void)
{
esp_err_t ret = ESP_FAIL;
uint32_t byte_aligned_caps[] = {MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT, 0, MALLOC_CAP_8BIT | MALLOC_CAP_32BIT};
uint32_t byte_aligned_caps[] = {MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT, 0, MALLOC_CAP_8BIT | MALLOC_CAP_32BIT | MALLOC_CAP_SIMD};
ret = heap_caps_add_region_with_caps(byte_aligned_caps,
s_psram_ctx.regions_to_heap[PSRAM_MEM_8BIT_ALIGNED].vaddr_start,
s_psram_ctx.regions_to_heap[PSRAM_MEM_8BIT_ALIGNED].vaddr_end);

View File

@ -46,6 +46,7 @@ extern "C" {
#define MALLOC_CAP_DMA_DESC_AHB (1<<17) ///< Memory must be capable of containing AHB DMA descriptors
#define MALLOC_CAP_DMA_DESC_AXI (1<<18) ///< Memory must be capable of containing AXI DMA descriptors
#define MALLOC_CAP_CACHE_ALIGNED (1<<19) ///< Memory must be aligned to the cache line size of any intermediate caches
#define MALLOC_CAP_SIMD (1<<20) ///< Memory must be capable of being used for SIMD instructions (i.e. allow for SIMD-specific-bit data accesses)
#define MALLOC_CAP_INVALID (1<<31) ///< Memory can't be used / list end marker

View File

@ -44,6 +44,9 @@ enum {
#define MALLOC_RTCRAM_BASE_CAPS ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_EXEC
#endif
// The memory used for SIMD instructions requires the bus of its memory regions be able to transfer the data in 128-bit
// TCM and RTCRAM memory regions cannot satisfy 128-bit data access
/**
* Defined the attributes and allocation priority of each memory on the chip,
* The heap allocator will traverse all types of memory types in column High Priority Matching and match the specified caps at first,
@ -51,11 +54,11 @@ enum {
* in turn to continue matching.
*/
const soc_memory_type_desc_t soc_memory_types[SOC_MEMORY_TYPE_NUM] = {
/* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */
[SOC_MEMORY_TYPE_L2MEM] = { "RAM", { MALLOC_L2MEM_BASE_CAPS, 0, 0 }},
[SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, ESP32P4_MEM_COMMON_CAPS, 0 }},
[SOC_MEMORY_TYPE_TCM] = { "TCM", { MALLOC_CAP_TCM, ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL, 0 }},
[SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS}},
/* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */
[SOC_MEMORY_TYPE_L2MEM] = { "RAM", { MALLOC_L2MEM_BASE_CAPS | MALLOC_CAP_SIMD, 0, 0 }},
[SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, 0, ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_SIMD }},
[SOC_MEMORY_TYPE_TCM] = { "TCM", { MALLOC_CAP_TCM, ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL, 0 }},
[SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS}},
};
const size_t soc_memory_type_count = sizeof(soc_memory_types) / sizeof(soc_memory_type_desc_t);

View File

@ -58,7 +58,7 @@ const soc_memory_type_desc_t soc_memory_types[] = {
/* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */
[SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS, 0, 0 }},
//TODO, in fact, part of them support EDMA, to be supported.
[SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, ESP32S2_MEM_COMMON_CAPS, 0 }},
[SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, 0, ESP32S2_MEM_COMMON_CAPS }},
[SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }},
};

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2019-2023 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2019-2024 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
@ -49,19 +49,21 @@ enum {
#define MALLOC_RTCRAM_BASE_CAPS ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_EXEC
#endif
// The memory used for SIMD instructions requires the bus of its memory regions be able to transfer the data in 128-bit
/**
* Defined the attributes and allocation priority of each memory on the chip,
* The heap allocator will traverse all types of memory types in column High Priority Matching and match the specified caps at first,
* if no memory caps matched or the allocation is failed, it will go to columns Medium Priorty Matching and Low Priority Matching
* if no memory caps matched or the allocation is failed, it will go to columns Medium Priority Matching and Low Priority Matching
* in turn to continue matching.
*/
const soc_memory_type_desc_t soc_memory_types[SOC_MEMORY_TYPE_NUM] = {
/* Mem Type Name | High Priority Matching | Medium Priorty Matching | Low Priority Matching */
[SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS, 0, 0 }},
[SOC_MEMORY_TYPE_DRAM] = { "DRAM", { 0, ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA, 0 }},
[SOC_MEMORY_TYPE_IRAM] = { "IRAM", { MALLOC_CAP_EXEC, MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL, 0 }},
[SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, ESP32S3_MEM_COMMON_CAPS, 0 }},
[SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }},
/* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */
[SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS | MALLOC_CAP_SIMD, 0, 0 }},
[SOC_MEMORY_TYPE_DRAM] = { "DRAM", { 0, ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA | MALLOC_CAP_SIMD, 0 }},
[SOC_MEMORY_TYPE_IRAM] = { "IRAM", { MALLOC_CAP_EXEC, MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL, 0 }},
[SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, 0, ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_SIMD }},
[SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }},
};
const size_t soc_memory_type_count = sizeof(soc_memory_types) / sizeof(soc_memory_type_desc_t);

View File

@ -339,6 +339,10 @@ config SOC_BITSCRAMBLER_SUPPORTED
bool
default y
config SOC_SIMD_INSTRUCTION_SUPPORTED
bool
default y
config SOC_XTAL_SUPPORT_40M
bool
default y
@ -551,6 +555,10 @@ config SOC_CPU_HAS_LOCKUP_RESET
bool
default y
config SOC_SIMD_PREFERRED_DATA_ALIGNMENT
int
default 16
config SOC_DS_SIGNATURE_MAX_BIT_LEN
int
default 4096

View File

@ -101,6 +101,7 @@
#define SOC_DEEP_SLEEP_SUPPORTED 1
#define SOC_PM_SUPPORTED 1
#define SOC_BITSCRAMBLER_SUPPORTED 1
#define SOC_SIMD_INSTRUCTION_SUPPORTED 1
/*-------------------------- XTAL CAPS ---------------------------------------*/
#define SOC_XTAL_SUPPORT_40M 1
@ -192,6 +193,8 @@
#define SOC_CPU_HAS_LOCKUP_RESET 1
#define SOC_SIMD_PREFERRED_DATA_ALIGNMENT 16 // The preferred data alignment accepted by the SIMD instructions, in bytes
/*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/
/** The maximum length of a Digital Signature in bits. */
#define SOC_DS_SIGNATURE_MAX_BIT_LEN (4096)

View File

@ -243,6 +243,10 @@ config SOC_PM_SUPPORTED
bool
default y
config SOC_SIMD_INSTRUCTION_SUPPORTED
bool
default y
config SOC_XTAL_SUPPORT_40M
bool
default y
@ -395,6 +399,10 @@ config SOC_CPU_WATCHPOINT_MAX_REGION_SIZE
int
default 64
config SOC_SIMD_PREFERRED_DATA_ALIGNMENT
int
default 16
config SOC_DS_SIGNATURE_MAX_BIT_LEN
int
default 4096

View File

@ -82,6 +82,7 @@
#define SOC_DEEP_SLEEP_SUPPORTED 1
#define SOC_LP_PERIPH_SHARE_INTERRUPT 1 // LP peripherals sharing the same interrupt source
#define SOC_PM_SUPPORTED 1
#define SOC_SIMD_INSTRUCTION_SUPPORTED 1
/*-------------------------- XTAL CAPS ---------------------------------------*/
#define SOC_XTAL_SUPPORT_40M 1
@ -148,6 +149,8 @@
#define SOC_CPU_WATCHPOINTS_NUM 2
#define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 64 // bytes
#define SOC_SIMD_PREFERRED_DATA_ALIGNMENT 16 // The preferred data alignment accepted by the SIMD instructions, in bytes
/*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/
/** The maximum length of a Digital Signature in bits. */
#define SOC_DS_SIGNATURE_MAX_BIT_LEN (4096)

View File

@ -1,6 +1,8 @@
Heap Memory Allocation
======================
{IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT: default="16", esp32s3="16", esp32p4="16"}
:link_to_translation:`zh_CN:[中文]`
Stack and Heap
@ -134,6 +136,13 @@ Memory allocated with ``MALLOC_CAP_32BIT`` can **only** be accessed via 32-bit r
On ESP32 only external SPI RAM under 4 MiB in size can be allocated this way. To use the region above the 4 MiB limit, you can use the :doc:`himem API </api-reference/system/himem>`.
.. only:: SOC_SIMD_INSTRUCTION_SUPPORTED
SIMD-Instruction-Capable Memory
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``MALLOC_CAP_SIMD`` flag can be used to allocate memory which is accessible by SIMD (Single Instruction Multiple Data) instructions. The use of this flag also aligns the memory to a SIMD preferred data alignment size ({IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT}-byte) for a better performance.
Thread Safety
-------------

View File

@ -1,6 +1,8 @@
堆内存分配
======================
{IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT: default="16", esp32s3="16", esp32p4="16"}
:link_to_translation:`en:[English]`
栈 (stack) 和堆 (heap) 的区别
@ -134,6 +136,13 @@ DMA 存储器
在 ESP32 上,只有不超过 4 MiB 的外部 SPI RAM 可以通过上述方式分配。要使用超过 4 MiB 限制的区域,可以使用 :doc:`himem API</api-reference/system/himem>`
.. only:: SOC_SIMD_INSTRUCTION_SUPPORTED
SIMD 指令可访问内存
^^^^^^^^^^^^^^^^^^^
``MALLOC_CAP_SIMD`` 标志用于分配可被 SIMD单指令多数据指令访问的内存。使用该标志时分配的内存会自动对齐到 SIMD 最佳数据对齐大小({IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT}-byte从而提升性能。
线程安全性
-------------