feat(heap): add a MALLOC_CAP_SIMD flag

MALLOC_CAP_SIMD can be used to allocate memory to be used for SIMD instructions
This commit is contained in:
Song Ruo Jing 2024-12-06 11:39:57 +08:00
parent 5fa3e381c9
commit 6cb64d7025
12 changed files with 70 additions and 17 deletions

View File

@ -20,7 +20,7 @@
#define HEAP_IRAM_ATTR IRAM_ATTR #define HEAP_IRAM_ATTR IRAM_ATTR
#endif #endif
#define CAPS_NEEDING_ALIGNMENT (MALLOC_CAP_DMA|MALLOC_CAP_DMA_DESC_AHB|MALLOC_CAP_DMA_DESC_AXI|MALLOC_CAP_CACHE_ALIGNED) #define CAPS_NEEDING_ALIGNMENT (MALLOC_CAP_DMA|MALLOC_CAP_DMA_DESC_AHB|MALLOC_CAP_DMA_DESC_AXI|MALLOC_CAP_CACHE_ALIGNED|MALLOC_CAP_SIMD)
HEAP_IRAM_ATTR void esp_heap_adjust_alignment_to_hw(size_t *p_alignment, size_t *p_size, uint32_t *p_caps) HEAP_IRAM_ATTR void esp_heap_adjust_alignment_to_hw(size_t *p_alignment, size_t *p_size, uint32_t *p_caps)
{ {
@ -72,6 +72,14 @@ HEAP_IRAM_ATTR void esp_heap_adjust_alignment_to_hw(size_t *p_alignment, size_t
if (cache_alignment_bytes > alignment) { if (cache_alignment_bytes > alignment) {
alignment = cache_alignment_bytes; alignment = cache_alignment_bytes;
} }
#if SOC_SIMD_INSTRUCTION_SUPPORTED
// SIMD instructions preferred data alignment, SOC_SIMD_PREFERRED_DATA_ALIGNMENT, which is also definitely a power of two
if (caps & MALLOC_CAP_SIMD) {
alignment = (alignment > SOC_SIMD_PREFERRED_DATA_ALIGNMENT) ? alignment : SOC_SIMD_PREFERRED_DATA_ALIGNMENT;
}
#endif
// Align up `size` to resulting alignment as well. // Align up `size` to resulting alignment as well.
size = (size + alignment - 1) & (~(alignment - 1)); size = (size + alignment - 1) & (~(alignment - 1));

View File

@ -369,7 +369,7 @@ esp_err_t esp_psram_extram_add_to_heap_allocator(void)
{ {
esp_err_t ret = ESP_FAIL; esp_err_t ret = ESP_FAIL;
uint32_t byte_aligned_caps[] = {MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT, 0, MALLOC_CAP_8BIT | MALLOC_CAP_32BIT}; uint32_t byte_aligned_caps[] = {MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT, 0, MALLOC_CAP_8BIT | MALLOC_CAP_32BIT | MALLOC_CAP_SIMD};
ret = heap_caps_add_region_with_caps(byte_aligned_caps, ret = heap_caps_add_region_with_caps(byte_aligned_caps,
s_psram_ctx.regions_to_heap[PSRAM_MEM_8BIT_ALIGNED].vaddr_start, s_psram_ctx.regions_to_heap[PSRAM_MEM_8BIT_ALIGNED].vaddr_start,
s_psram_ctx.regions_to_heap[PSRAM_MEM_8BIT_ALIGNED].vaddr_end); s_psram_ctx.regions_to_heap[PSRAM_MEM_8BIT_ALIGNED].vaddr_end);

View File

@ -46,6 +46,7 @@ extern "C" {
#define MALLOC_CAP_DMA_DESC_AHB (1<<17) ///< Memory must be capable of containing AHB DMA descriptors #define MALLOC_CAP_DMA_DESC_AHB (1<<17) ///< Memory must be capable of containing AHB DMA descriptors
#define MALLOC_CAP_DMA_DESC_AXI (1<<18) ///< Memory must be capable of containing AXI DMA descriptors #define MALLOC_CAP_DMA_DESC_AXI (1<<18) ///< Memory must be capable of containing AXI DMA descriptors
#define MALLOC_CAP_CACHE_ALIGNED (1<<19) ///< Memory must be aligned to the cache line size of any intermediate caches #define MALLOC_CAP_CACHE_ALIGNED (1<<19) ///< Memory must be aligned to the cache line size of any intermediate caches
#define MALLOC_CAP_SIMD (1<<20) ///< Memory must be capable of being used for SIMD instructions (i.e. allow for SIMD-specific-bit data accesses)
#define MALLOC_CAP_INVALID (1<<31) ///< Memory can't be used / list end marker #define MALLOC_CAP_INVALID (1<<31) ///< Memory can't be used / list end marker

View File

@ -44,6 +44,9 @@ enum {
#define MALLOC_RTCRAM_BASE_CAPS ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_EXEC #define MALLOC_RTCRAM_BASE_CAPS ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_EXEC
#endif #endif
// The memory used for SIMD instructions requires the bus of its memory regions be able to transfer the data in 128-bit
// TCM and RTCRAM memory regions cannot satisfy 128-bit data access
/** /**
* Defined the attributes and allocation priority of each memory on the chip, * Defined the attributes and allocation priority of each memory on the chip,
* The heap allocator will traverse all types of memory types in column High Priority Matching and match the specified caps at first, * The heap allocator will traverse all types of memory types in column High Priority Matching and match the specified caps at first,
@ -51,11 +54,11 @@ enum {
* in turn to continue matching. * in turn to continue matching.
*/ */
const soc_memory_type_desc_t soc_memory_types[SOC_MEMORY_TYPE_NUM] = { const soc_memory_type_desc_t soc_memory_types[SOC_MEMORY_TYPE_NUM] = {
/* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */ /* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */
[SOC_MEMORY_TYPE_L2MEM] = { "RAM", { MALLOC_L2MEM_BASE_CAPS, 0, 0 }}, [SOC_MEMORY_TYPE_L2MEM] = { "RAM", { MALLOC_L2MEM_BASE_CAPS | MALLOC_CAP_SIMD, 0, 0 }},
[SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, ESP32P4_MEM_COMMON_CAPS, 0 }}, [SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, 0, ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_SIMD }},
[SOC_MEMORY_TYPE_TCM] = { "TCM", { MALLOC_CAP_TCM, ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL, 0 }}, [SOC_MEMORY_TYPE_TCM] = { "TCM", { MALLOC_CAP_TCM, ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL, 0 }},
[SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS}}, [SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS}},
}; };
const size_t soc_memory_type_count = sizeof(soc_memory_types) / sizeof(soc_memory_type_desc_t); const size_t soc_memory_type_count = sizeof(soc_memory_types) / sizeof(soc_memory_type_desc_t);

View File

@ -58,7 +58,7 @@ const soc_memory_type_desc_t soc_memory_types[] = {
/* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */ /* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */
[SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS, 0, 0 }}, [SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS, 0, 0 }},
//TODO, in fact, part of them support EDMA, to be supported. //TODO, in fact, part of them support EDMA, to be supported.
[SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, ESP32S2_MEM_COMMON_CAPS, 0 }}, [SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, 0, ESP32S2_MEM_COMMON_CAPS }},
[SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }}, [SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }},
}; };

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: 2019-2023 Espressif Systems (Shanghai) CO LTD * SPDX-FileCopyrightText: 2019-2024 Espressif Systems (Shanghai) CO LTD
* *
* SPDX-License-Identifier: Apache-2.0 * SPDX-License-Identifier: Apache-2.0
*/ */
@ -49,19 +49,21 @@ enum {
#define MALLOC_RTCRAM_BASE_CAPS ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_EXEC #define MALLOC_RTCRAM_BASE_CAPS ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_EXEC
#endif #endif
// The memory used for SIMD instructions requires the bus of its memory regions be able to transfer the data in 128-bit
/** /**
* Defined the attributes and allocation priority of each memory on the chip, * Defined the attributes and allocation priority of each memory on the chip,
* The heap allocator will traverse all types of memory types in column High Priority Matching and match the specified caps at first, * The heap allocator will traverse all types of memory types in column High Priority Matching and match the specified caps at first,
* if no memory caps matched or the allocation is failed, it will go to columns Medium Priorty Matching and Low Priority Matching * if no memory caps matched or the allocation is failed, it will go to columns Medium Priority Matching and Low Priority Matching
* in turn to continue matching. * in turn to continue matching.
*/ */
const soc_memory_type_desc_t soc_memory_types[SOC_MEMORY_TYPE_NUM] = { const soc_memory_type_desc_t soc_memory_types[SOC_MEMORY_TYPE_NUM] = {
/* Mem Type Name | High Priority Matching | Medium Priorty Matching | Low Priority Matching */ /* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */
[SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS, 0, 0 }}, [SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS | MALLOC_CAP_SIMD, 0, 0 }},
[SOC_MEMORY_TYPE_DRAM] = { "DRAM", { 0, ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA, 0 }}, [SOC_MEMORY_TYPE_DRAM] = { "DRAM", { 0, ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA | MALLOC_CAP_SIMD, 0 }},
[SOC_MEMORY_TYPE_IRAM] = { "IRAM", { MALLOC_CAP_EXEC, MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL, 0 }}, [SOC_MEMORY_TYPE_IRAM] = { "IRAM", { MALLOC_CAP_EXEC, MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL, 0 }},
[SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, ESP32S3_MEM_COMMON_CAPS, 0 }}, [SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, 0, ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_SIMD }},
[SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }}, [SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }},
}; };
const size_t soc_memory_type_count = sizeof(soc_memory_types) / sizeof(soc_memory_type_desc_t); const size_t soc_memory_type_count = sizeof(soc_memory_types) / sizeof(soc_memory_type_desc_t);

View File

@ -331,6 +331,10 @@ config SOC_PM_SUPPORTED
bool bool
default y default y
config SOC_SIMD_INSTRUCTION_SUPPORTED
bool
default y
config SOC_XTAL_SUPPORT_40M config SOC_XTAL_SUPPORT_40M
bool bool
default y default y
@ -535,6 +539,10 @@ config SOC_CPU_HAS_LOCKUP_RESET
bool bool
default y default y
config SOC_SIMD_PREFERRED_DATA_ALIGNMENT
int
default 16
config SOC_DS_SIGNATURE_MAX_BIT_LEN config SOC_DS_SIGNATURE_MAX_BIT_LEN
int int
default 4096 default 4096

View File

@ -99,7 +99,7 @@
#define SOC_LIGHT_SLEEP_SUPPORTED 1 #define SOC_LIGHT_SLEEP_SUPPORTED 1
#define SOC_DEEP_SLEEP_SUPPORTED 1 #define SOC_DEEP_SLEEP_SUPPORTED 1
#define SOC_PM_SUPPORTED 1 #define SOC_PM_SUPPORTED 1
#define SOC_SIMD_INSTRUCTION_SUPPORTED 1
/*-------------------------- XTAL CAPS ---------------------------------------*/ /*-------------------------- XTAL CAPS ---------------------------------------*/
#define SOC_XTAL_SUPPORT_40M 1 #define SOC_XTAL_SUPPORT_40M 1
@ -189,6 +189,8 @@
#define SOC_CPU_HAS_LOCKUP_RESET 1 #define SOC_CPU_HAS_LOCKUP_RESET 1
#define SOC_SIMD_PREFERRED_DATA_ALIGNMENT 16 // The preferred data alignment accepted by the SIMD instructions, in bytes
/*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/
/** The maximum length of a Digital Signature in bits. */ /** The maximum length of a Digital Signature in bits. */
#define SOC_DS_SIGNATURE_MAX_BIT_LEN (4096) #define SOC_DS_SIGNATURE_MAX_BIT_LEN (4096)

View File

@ -263,6 +263,10 @@ config SOC_PM_SUPPORTED
bool bool
default y default y
config SOC_SIMD_INSTRUCTION_SUPPORTED
bool
default y
config SOC_XTAL_SUPPORT_40M config SOC_XTAL_SUPPORT_40M
bool bool
default y default y
@ -411,6 +415,10 @@ config SOC_CPU_WATCHPOINT_MAX_REGION_SIZE
int int
default 64 default 64
config SOC_SIMD_PREFERRED_DATA_ALIGNMENT
int
default 16
config SOC_DS_SIGNATURE_MAX_BIT_LEN config SOC_DS_SIGNATURE_MAX_BIT_LEN
int int
default 4096 default 4096

View File

@ -82,6 +82,7 @@
#define SOC_DEEP_SLEEP_SUPPORTED 1 #define SOC_DEEP_SLEEP_SUPPORTED 1
#define SOC_LP_PERIPH_SHARE_INTERRUPT 1 // LP peripherals sharing the same interrupt source #define SOC_LP_PERIPH_SHARE_INTERRUPT 1 // LP peripherals sharing the same interrupt source
#define SOC_PM_SUPPORTED 1 #define SOC_PM_SUPPORTED 1
#define SOC_SIMD_INSTRUCTION_SUPPORTED 1
/*-------------------------- XTAL CAPS ---------------------------------------*/ /*-------------------------- XTAL CAPS ---------------------------------------*/
#define SOC_XTAL_SUPPORT_40M 1 #define SOC_XTAL_SUPPORT_40M 1
@ -147,6 +148,8 @@
#define SOC_CPU_WATCHPOINTS_NUM 2 #define SOC_CPU_WATCHPOINTS_NUM 2
#define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 64 // bytes #define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 64 // bytes
#define SOC_SIMD_PREFERRED_DATA_ALIGNMENT 16 // The preferred data alignment accepted by the SIMD instructions, in bytes
/*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/
/** The maximum length of a Digital Signature in bits. */ /** The maximum length of a Digital Signature in bits. */
#define SOC_DS_SIGNATURE_MAX_BIT_LEN (4096) #define SOC_DS_SIGNATURE_MAX_BIT_LEN (4096)

View File

@ -1,6 +1,8 @@
Heap Memory Allocation Heap Memory Allocation
====================== ======================
{IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT: default="16", esp32s3="16", esp32p4="16"}
:link_to_translation:`zh_CN:[中文]` :link_to_translation:`zh_CN:[中文]`
Stack and Heap Stack and Heap
@ -134,6 +136,13 @@ Memory allocated with ``MALLOC_CAP_32BIT`` can **only** be accessed via 32-bit r
On ESP32 only external SPI RAM under 4 MiB in size can be allocated this way. To use the region above the 4 MiB limit, you can use the :doc:`himem API </api-reference/system/himem>`. On ESP32 only external SPI RAM under 4 MiB in size can be allocated this way. To use the region above the 4 MiB limit, you can use the :doc:`himem API </api-reference/system/himem>`.
.. only:: SOC_SIMD_INSTRUCTION_SUPPORTED
SIMD-Instruction-Capable Memory
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
``MALLOC_CAP_SIMD`` flag can be used to allocate memory which is accessible by SIMD (Single Instruction Multiple Data) instructions. The use of this flag also aligns the memory to a SIMD preferred data alignment size ({IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT}-byte) for a better performance.
Thread Safety Thread Safety
------------- -------------

View File

@ -1,6 +1,8 @@
堆内存分配 堆内存分配
====================== ======================
{IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT: default="16", esp32s3="16", esp32p4="16"}
:link_to_translation:`en:[English]` :link_to_translation:`en:[English]`
栈 (stack) 和堆 (heap) 的区别 栈 (stack) 和堆 (heap) 的区别
@ -134,6 +136,13 @@ DMA 存储器
在 ESP32 上,只有不超过 4 MiB 的外部 SPI RAM 可以通过上述方式分配。要使用超过 4 MiB 限制的区域,可以使用 :doc:`himem API</api-reference/system/himem>` 在 ESP32 上,只有不超过 4 MiB 的外部 SPI RAM 可以通过上述方式分配。要使用超过 4 MiB 限制的区域,可以使用 :doc:`himem API</api-reference/system/himem>`
.. only:: SOC_SIMD_INSTRUCTION_SUPPORTED
SIMD 指令可访问内存
^^^^^^^^^^^^^^^^^^^
``MALLOC_CAP_SIMD`` 标志用于分配可被 SIMD单指令多数据指令访问的内存。使用该标志时分配的内存会自动对齐到 SIMD 最佳数据对齐大小({IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT}-byte从而提升性能。
线程安全性 线程安全性
------------- -------------