From 6cb64d702540e7318c8539178f52d338c43ceca1 Mon Sep 17 00:00:00 2001 From: Song Ruo Jing Date: Fri, 6 Dec 2024 11:39:57 +0800 Subject: [PATCH] feat(heap): add a MALLOC_CAP_SIMD flag MALLOC_CAP_SIMD can be used to allocate memory to be used for SIMD instructions --- components/esp_mm/heap_align_hw.c | 10 +++++++++- components/esp_psram/esp_psram.c | 2 +- components/heap/include/esp_heap_caps.h | 1 + components/heap/port/esp32p4/memory_layout.c | 13 ++++++++----- components/heap/port/esp32s2/memory_layout.c | 2 +- components/heap/port/esp32s3/memory_layout.c | 18 ++++++++++-------- .../esp32p4/include/soc/Kconfig.soc_caps.in | 8 ++++++++ components/soc/esp32p4/include/soc/soc_caps.h | 4 +++- .../esp32s3/include/soc/Kconfig.soc_caps.in | 8 ++++++++ components/soc/esp32s3/include/soc/soc_caps.h | 3 +++ docs/en/api-reference/system/mem_alloc.rst | 9 +++++++++ docs/zh_CN/api-reference/system/mem_alloc.rst | 9 +++++++++ 12 files changed, 70 insertions(+), 17 deletions(-) diff --git a/components/esp_mm/heap_align_hw.c b/components/esp_mm/heap_align_hw.c index 96787f19d0..d31ea59bd3 100644 --- a/components/esp_mm/heap_align_hw.c +++ b/components/esp_mm/heap_align_hw.c @@ -20,7 +20,7 @@ #define HEAP_IRAM_ATTR IRAM_ATTR #endif -#define CAPS_NEEDING_ALIGNMENT (MALLOC_CAP_DMA|MALLOC_CAP_DMA_DESC_AHB|MALLOC_CAP_DMA_DESC_AXI|MALLOC_CAP_CACHE_ALIGNED) +#define CAPS_NEEDING_ALIGNMENT (MALLOC_CAP_DMA|MALLOC_CAP_DMA_DESC_AHB|MALLOC_CAP_DMA_DESC_AXI|MALLOC_CAP_CACHE_ALIGNED|MALLOC_CAP_SIMD) HEAP_IRAM_ATTR void esp_heap_adjust_alignment_to_hw(size_t *p_alignment, size_t *p_size, uint32_t *p_caps) { @@ -72,6 +72,14 @@ HEAP_IRAM_ATTR void esp_heap_adjust_alignment_to_hw(size_t *p_alignment, size_t if (cache_alignment_bytes > alignment) { alignment = cache_alignment_bytes; } + +#if SOC_SIMD_INSTRUCTION_SUPPORTED + // SIMD instructions preferred data alignment, SOC_SIMD_PREFERRED_DATA_ALIGNMENT, which is also definitely a power of two + if (caps & MALLOC_CAP_SIMD) { + alignment = (alignment > SOC_SIMD_PREFERRED_DATA_ALIGNMENT) ? alignment : SOC_SIMD_PREFERRED_DATA_ALIGNMENT; + } +#endif + // Align up `size` to resulting alignment as well. size = (size + alignment - 1) & (~(alignment - 1)); diff --git a/components/esp_psram/esp_psram.c b/components/esp_psram/esp_psram.c index 80a9cdeb5f..0ca1f8f792 100644 --- a/components/esp_psram/esp_psram.c +++ b/components/esp_psram/esp_psram.c @@ -369,7 +369,7 @@ esp_err_t esp_psram_extram_add_to_heap_allocator(void) { esp_err_t ret = ESP_FAIL; - uint32_t byte_aligned_caps[] = {MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT, 0, MALLOC_CAP_8BIT | MALLOC_CAP_32BIT}; + uint32_t byte_aligned_caps[] = {MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT, 0, MALLOC_CAP_8BIT | MALLOC_CAP_32BIT | MALLOC_CAP_SIMD}; ret = heap_caps_add_region_with_caps(byte_aligned_caps, s_psram_ctx.regions_to_heap[PSRAM_MEM_8BIT_ALIGNED].vaddr_start, s_psram_ctx.regions_to_heap[PSRAM_MEM_8BIT_ALIGNED].vaddr_end); diff --git a/components/heap/include/esp_heap_caps.h b/components/heap/include/esp_heap_caps.h index e875e4badd..0229809c18 100644 --- a/components/heap/include/esp_heap_caps.h +++ b/components/heap/include/esp_heap_caps.h @@ -46,6 +46,7 @@ extern "C" { #define MALLOC_CAP_DMA_DESC_AHB (1<<17) ///< Memory must be capable of containing AHB DMA descriptors #define MALLOC_CAP_DMA_DESC_AXI (1<<18) ///< Memory must be capable of containing AXI DMA descriptors #define MALLOC_CAP_CACHE_ALIGNED (1<<19) ///< Memory must be aligned to the cache line size of any intermediate caches +#define MALLOC_CAP_SIMD (1<<20) ///< Memory must be capable of being used for SIMD instructions (i.e. allow for SIMD-specific-bit data accesses) #define MALLOC_CAP_INVALID (1<<31) ///< Memory can't be used / list end marker diff --git a/components/heap/port/esp32p4/memory_layout.c b/components/heap/port/esp32p4/memory_layout.c index 79357938c0..dadd34a755 100644 --- a/components/heap/port/esp32p4/memory_layout.c +++ b/components/heap/port/esp32p4/memory_layout.c @@ -44,6 +44,9 @@ enum { #define MALLOC_RTCRAM_BASE_CAPS ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_EXEC #endif +// The memory used for SIMD instructions requires the bus of its memory regions be able to transfer the data in 128-bit +// TCM and RTCRAM memory regions cannot satisfy 128-bit data access + /** * Defined the attributes and allocation priority of each memory on the chip, * The heap allocator will traverse all types of memory types in column High Priority Matching and match the specified caps at first, @@ -51,11 +54,11 @@ enum { * in turn to continue matching. */ const soc_memory_type_desc_t soc_memory_types[SOC_MEMORY_TYPE_NUM] = { - /* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */ - [SOC_MEMORY_TYPE_L2MEM] = { "RAM", { MALLOC_L2MEM_BASE_CAPS, 0, 0 }}, - [SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, ESP32P4_MEM_COMMON_CAPS, 0 }}, - [SOC_MEMORY_TYPE_TCM] = { "TCM", { MALLOC_CAP_TCM, ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL, 0 }}, - [SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS}}, + /* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */ + [SOC_MEMORY_TYPE_L2MEM] = { "RAM", { MALLOC_L2MEM_BASE_CAPS | MALLOC_CAP_SIMD, 0, 0 }}, + [SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, 0, ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_SIMD }}, + [SOC_MEMORY_TYPE_TCM] = { "TCM", { MALLOC_CAP_TCM, ESP32P4_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL, 0 }}, + [SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS}}, }; const size_t soc_memory_type_count = sizeof(soc_memory_types) / sizeof(soc_memory_type_desc_t); diff --git a/components/heap/port/esp32s2/memory_layout.c b/components/heap/port/esp32s2/memory_layout.c index e64ebe80aa..6bd57135ce 100644 --- a/components/heap/port/esp32s2/memory_layout.c +++ b/components/heap/port/esp32s2/memory_layout.c @@ -58,7 +58,7 @@ const soc_memory_type_desc_t soc_memory_types[] = { /* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */ [SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS, 0, 0 }}, //TODO, in fact, part of them support EDMA, to be supported. - [SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, ESP32S2_MEM_COMMON_CAPS, 0 }}, + [SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, 0, ESP32S2_MEM_COMMON_CAPS }}, [SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }}, }; diff --git a/components/heap/port/esp32s3/memory_layout.c b/components/heap/port/esp32s3/memory_layout.c index b5a0a60a6b..2e07f63f28 100644 --- a/components/heap/port/esp32s3/memory_layout.c +++ b/components/heap/port/esp32s3/memory_layout.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2019-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2019-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -49,19 +49,21 @@ enum { #define MALLOC_RTCRAM_BASE_CAPS ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_EXEC #endif +// The memory used for SIMD instructions requires the bus of its memory regions be able to transfer the data in 128-bit + /** * Defined the attributes and allocation priority of each memory on the chip, * The heap allocator will traverse all types of memory types in column High Priority Matching and match the specified caps at first, - * if no memory caps matched or the allocation is failed, it will go to columns Medium Priorty Matching and Low Priority Matching + * if no memory caps matched or the allocation is failed, it will go to columns Medium Priority Matching and Low Priority Matching * in turn to continue matching. */ const soc_memory_type_desc_t soc_memory_types[SOC_MEMORY_TYPE_NUM] = { -/* Mem Type Name | High Priority Matching | Medium Priorty Matching | Low Priority Matching */ - [SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS, 0, 0 }}, - [SOC_MEMORY_TYPE_DRAM] = { "DRAM", { 0, ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA, 0 }}, - [SOC_MEMORY_TYPE_IRAM] = { "IRAM", { MALLOC_CAP_EXEC, MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL, 0 }}, - [SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, ESP32S3_MEM_COMMON_CAPS, 0 }}, - [SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }}, +/* Mem Type Name | High Priority Matching | Medium Priority Matching | Low Priority Matching */ + [SOC_MEMORY_TYPE_DIRAM] = { "RAM", { MALLOC_DIRAM_BASE_CAPS | MALLOC_CAP_SIMD, 0, 0 }}, + [SOC_MEMORY_TYPE_DRAM] = { "DRAM", { 0, ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_INTERNAL | MALLOC_CAP_DMA | MALLOC_CAP_SIMD, 0 }}, + [SOC_MEMORY_TYPE_IRAM] = { "IRAM", { MALLOC_CAP_EXEC, MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL, 0 }}, + [SOC_MEMORY_TYPE_SPIRAM] = { "SPIRAM", { MALLOC_CAP_SPIRAM, 0, ESP32S3_MEM_COMMON_CAPS | MALLOC_CAP_SIMD }}, + [SOC_MEMORY_TYPE_RTCRAM] = { "RTCRAM", { MALLOC_CAP_RTCRAM, 0, MALLOC_RTCRAM_BASE_CAPS }}, }; const size_t soc_memory_type_count = sizeof(soc_memory_types) / sizeof(soc_memory_type_desc_t); diff --git a/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in b/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in index 378147443c..2b5bea2ba3 100644 --- a/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in @@ -331,6 +331,10 @@ config SOC_PM_SUPPORTED bool default y +config SOC_SIMD_INSTRUCTION_SUPPORTED + bool + default y + config SOC_XTAL_SUPPORT_40M bool default y @@ -535,6 +539,10 @@ config SOC_CPU_HAS_LOCKUP_RESET bool default y +config SOC_SIMD_PREFERRED_DATA_ALIGNMENT + int + default 16 + config SOC_DS_SIGNATURE_MAX_BIT_LEN int default 4096 diff --git a/components/soc/esp32p4/include/soc/soc_caps.h b/components/soc/esp32p4/include/soc/soc_caps.h index 4c7f8eb92f..f6e92e1388 100644 --- a/components/soc/esp32p4/include/soc/soc_caps.h +++ b/components/soc/esp32p4/include/soc/soc_caps.h @@ -99,7 +99,7 @@ #define SOC_LIGHT_SLEEP_SUPPORTED 1 #define SOC_DEEP_SLEEP_SUPPORTED 1 #define SOC_PM_SUPPORTED 1 - +#define SOC_SIMD_INSTRUCTION_SUPPORTED 1 /*-------------------------- XTAL CAPS ---------------------------------------*/ #define SOC_XTAL_SUPPORT_40M 1 @@ -189,6 +189,8 @@ #define SOC_CPU_HAS_LOCKUP_RESET 1 +#define SOC_SIMD_PREFERRED_DATA_ALIGNMENT 16 // The preferred data alignment accepted by the SIMD instructions, in bytes + /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ /** The maximum length of a Digital Signature in bits. */ #define SOC_DS_SIGNATURE_MAX_BIT_LEN (4096) diff --git a/components/soc/esp32s3/include/soc/Kconfig.soc_caps.in b/components/soc/esp32s3/include/soc/Kconfig.soc_caps.in index c93e95122b..3e5015a248 100644 --- a/components/soc/esp32s3/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32s3/include/soc/Kconfig.soc_caps.in @@ -263,6 +263,10 @@ config SOC_PM_SUPPORTED bool default y +config SOC_SIMD_INSTRUCTION_SUPPORTED + bool + default y + config SOC_XTAL_SUPPORT_40M bool default y @@ -411,6 +415,10 @@ config SOC_CPU_WATCHPOINT_MAX_REGION_SIZE int default 64 +config SOC_SIMD_PREFERRED_DATA_ALIGNMENT + int + default 16 + config SOC_DS_SIGNATURE_MAX_BIT_LEN int default 4096 diff --git a/components/soc/esp32s3/include/soc/soc_caps.h b/components/soc/esp32s3/include/soc/soc_caps.h index 0a2c08e439..9f1313068d 100644 --- a/components/soc/esp32s3/include/soc/soc_caps.h +++ b/components/soc/esp32s3/include/soc/soc_caps.h @@ -82,6 +82,7 @@ #define SOC_DEEP_SLEEP_SUPPORTED 1 #define SOC_LP_PERIPH_SHARE_INTERRUPT 1 // LP peripherals sharing the same interrupt source #define SOC_PM_SUPPORTED 1 +#define SOC_SIMD_INSTRUCTION_SUPPORTED 1 /*-------------------------- XTAL CAPS ---------------------------------------*/ #define SOC_XTAL_SUPPORT_40M 1 @@ -147,6 +148,8 @@ #define SOC_CPU_WATCHPOINTS_NUM 2 #define SOC_CPU_WATCHPOINT_MAX_REGION_SIZE 64 // bytes +#define SOC_SIMD_PREFERRED_DATA_ALIGNMENT 16 // The preferred data alignment accepted by the SIMD instructions, in bytes + /*-------------------------- DIGITAL SIGNATURE CAPS ----------------------------------------*/ /** The maximum length of a Digital Signature in bits. */ #define SOC_DS_SIGNATURE_MAX_BIT_LEN (4096) diff --git a/docs/en/api-reference/system/mem_alloc.rst b/docs/en/api-reference/system/mem_alloc.rst index dbe30659a0..363d57fbc8 100644 --- a/docs/en/api-reference/system/mem_alloc.rst +++ b/docs/en/api-reference/system/mem_alloc.rst @@ -1,6 +1,8 @@ Heap Memory Allocation ====================== +{IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT: default="16", esp32s3="16", esp32p4="16"} + :link_to_translation:`zh_CN:[中文]` Stack and Heap @@ -134,6 +136,13 @@ Memory allocated with ``MALLOC_CAP_32BIT`` can **only** be accessed via 32-bit r On ESP32 only external SPI RAM under 4 MiB in size can be allocated this way. To use the region above the 4 MiB limit, you can use the :doc:`himem API `. +.. only:: SOC_SIMD_INSTRUCTION_SUPPORTED + + SIMD-Instruction-Capable Memory + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + ``MALLOC_CAP_SIMD`` flag can be used to allocate memory which is accessible by SIMD (Single Instruction Multiple Data) instructions. The use of this flag also aligns the memory to a SIMD preferred data alignment size ({IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT}-byte) for a better performance. + Thread Safety ------------- diff --git a/docs/zh_CN/api-reference/system/mem_alloc.rst b/docs/zh_CN/api-reference/system/mem_alloc.rst index ed203af3ab..ea3709de69 100644 --- a/docs/zh_CN/api-reference/system/mem_alloc.rst +++ b/docs/zh_CN/api-reference/system/mem_alloc.rst @@ -1,6 +1,8 @@ 堆内存分配 ====================== +{IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT: default="16", esp32s3="16", esp32p4="16"} + :link_to_translation:`en:[English]` 栈 (stack) 和堆 (heap) 的区别 @@ -134,6 +136,13 @@ DMA 存储器 在 ESP32 上,只有不超过 4 MiB 的外部 SPI RAM 可以通过上述方式分配。要使用超过 4 MiB 限制的区域,可以使用 :doc:`himem API`。 +.. only:: SOC_SIMD_INSTRUCTION_SUPPORTED + + SIMD 指令可访问内存 + ^^^^^^^^^^^^^^^^^^^ + + ``MALLOC_CAP_SIMD`` 标志用于分配可被 SIMD(单指令多数据)指令访问的内存。使用该标志时,分配的内存会自动对齐到 SIMD 最佳数据对齐大小({IDF_TARGET_SIMD_PREFERRED_DATA_ALIGNMENT}-byte),从而提升性能。 + 线程安全性 -------------