From cb9786d35e62d4b4973d668f298dde825d9c4f3f Mon Sep 17 00:00:00 2001 From: Jakob Hasse Date: Fri, 2 Dec 2022 17:30:56 +0100 Subject: [PATCH] refactor(esp_system): reboot delay: added docs, protected by watchdog --- components/esp_system/Kconfig | 3 +- components/esp_system/panic.c | 60 ++++++++++--------- components/esp_system/port/panic_handler.c | 4 +- docs/en/api-guides/fatal-errors.rst | 4 +- tools/test_apps/system/panic/pytest_panic.py | 19 ++++++ .../system/panic/sdkconfig.ci.panic_delay | 2 + 6 files changed, 60 insertions(+), 32 deletions(-) create mode 100644 tools/test_apps/system/panic/sdkconfig.ci.panic_delay diff --git a/components/esp_system/Kconfig b/components/esp_system/Kconfig index 7999623d16..074b4c5f37 100644 --- a/components/esp_system/Kconfig +++ b/components/esp_system/Kconfig @@ -51,7 +51,8 @@ menu "ESP System Settings" config ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS int "Panic reboot delay (Seconds)" default 0 - depends on ESP_SYSTEM_PANIC_PRINT_REBOOT || ESP_SYSTEM_PANIC_SILENT_REBOOT + range 0 99 + depends on ESP_SYSTEM_PANIC_PRINT_REBOOT help After the panic handler executes, you can specify a number of seconds to wait before the device reboots. diff --git a/components/esp_system/panic.c b/components/esp_system/panic.c index a5f48a205f..4073564742 100644 --- a/components/esp_system/panic.c +++ b/components/esp_system/panic.c @@ -173,7 +173,7 @@ void panic_print_dec(int d) for example stalling the other core on ESP32 may cause the ESP32_ECO3_CACHE_LOCK_FIX handler to get stuck. */ -void esp_panic_handler_reconfigure_wdts(void) +void esp_panic_handler_reconfigure_wdts(uint32_t timeout_ms) { wdt_hal_context_t wdt0_context = {.inst = WDT_MWDT0, .mwdt_dev = &TIMERG0}; #if SOC_TIMER_GROUPS >= 2 @@ -185,7 +185,7 @@ void esp_panic_handler_reconfigure_wdts(void) //Reconfigure TWDT (Timer Group 0) wdt_hal_init(&wdt0_context, WDT_MWDT0, MWDT0_TICK_PRESCALER, false); //Prescaler: wdt counts in ticks of TG0_WDT_TICK_US wdt_hal_write_protect_disable(&wdt0_context); - wdt_hal_config_stage(&wdt0_context, 0, 1000 * 1000 / MWDT0_TICKS_PER_US, WDT_STAGE_ACTION_RESET_SYSTEM); //1 second before reset + wdt_hal_config_stage(&wdt0_context, 0, timeout_ms * 1000 / MWDT0_TICKS_PER_US, WDT_STAGE_ACTION_RESET_SYSTEM); //1 second before reset wdt_hal_enable(&wdt0_context); wdt_hal_write_protect_enable(&wdt0_context); @@ -233,7 +233,7 @@ void esp_panic_handler(panic_info_t *info) { // The port-level panic handler has already called this, but call it again // to reset the TG0WDT period - esp_panic_handler_reconfigure_wdts(); + esp_panic_handler_reconfigure_wdts(1000); // If the exception was due to an abort, override some of the panic info if (g_panic_abort) { @@ -318,7 +318,7 @@ void esp_panic_handler(panic_info_t *info) } - esp_panic_handler_reconfigure_wdts(); // Restart WDT again + esp_panic_handler_reconfigure_wdts(1000); // Restart WDT again PANIC_INFO_DUMP(info, state); panic_print_str("\r\n"); @@ -349,7 +349,7 @@ void esp_panic_handler(panic_info_t *info) esp_apptrace_flush_nolock(ESP_APPTRACE_DEST_TRAX, CONFIG_APPTRACE_POSTMORTEM_FLUSH_THRESH, APPTRACE_ONPANIC_HOST_FLUSH_TMO); #endif - esp_panic_handler_reconfigure_wdts(); // restore WDT config + esp_panic_handler_reconfigure_wdts(1000); // restore WDT config #endif // CONFIG_APPTRACE_ENABLE #if CONFIG_ESP_SYSTEM_PANIC_GDBSTUB @@ -375,35 +375,39 @@ void esp_panic_handler(panic_info_t *info) #endif s_dumping_core = false; - esp_panic_handler_reconfigure_wdts(); + esp_panic_handler_reconfigure_wdts(1000); } #endif /* CONFIG_ESP_COREDUMP_ENABLE */ + +#if CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS + // start RTC WDT if it hasn't been started yet and set the timeout to more than the delay time + wdt_hal_init(&rtc_wdt_ctx, WDT_RWDT, 0, false); + uint32_t stage_timeout_ticks = (uint32_t)(((CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS + 1) * 1000 + * rtc_clk_slow_freq_get_hz()) / 1000ULL); + wdt_hal_write_protect_disable(&rtc_wdt_ctx); + wdt_hal_config_stage(&rtc_wdt_ctx, WDT_STAGE0, stage_timeout_ticks, WDT_STAGE_ACTION_RESET_SYSTEM); + // 64KB of core dump data (stacks of about 30 tasks) will produce ~85KB base64 data. + // @ 115200 UART speed it will take more than 6 sec to print them out. + wdt_hal_enable(&rtc_wdt_ctx); + wdt_hal_write_protect_enable(&rtc_wdt_ctx); + + esp_panic_handler_reconfigure_wdts((CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS + 1) * 1000); + + panic_print_str("Rebooting in "); + panic_print_dec(CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS); + panic_print_str(" seconds...\r\n"); + + esp_rom_delay_us(CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS * 1000000); + + esp_panic_handler_reconfigure_wdts(1000); +#endif /* CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS */ + wdt_hal_write_protect_disable(&rtc_wdt_ctx); wdt_hal_disable(&rtc_wdt_ctx); wdt_hal_write_protect_enable(&rtc_wdt_ctx); + #if CONFIG_ESP_SYSTEM_PANIC_PRINT_REBOOT || CONFIG_ESP_SYSTEM_PANIC_SILENT_REBOOT -#if CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS - - disable_all_wdts(); - - panic_print_str("Waiting to reboot...\r\n"); - - for(int i = 0; i < CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS; i++) { - - // Display a progress countdown. - // Only print every 5th second so users terminal is still "calm". - if (i % 5 == 0) { - panic_print_dec(CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS - i); - panic_print_str(" seconds...\r\n"); - } - - esp_rom_delay_us(1000000); - } - - esp_panic_handler_reconfigure_wdts(); -#endif /* CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS */ - if (esp_reset_reason_get_hint() == ESP_RST_UNKNOWN) { switch (info->exception) { case PANIC_EXCEPTION_IWDT: @@ -422,7 +426,7 @@ void esp_panic_handler(panic_info_t *info) panic_print_str("Rebooting...\r\n"); panic_restart(); -#else +#else /* CONFIG_ESP_SYSTEM_PANIC_PRINT_REBOOT || CONFIG_ESP_SYSTEM_PANIC_SILENT_REBOOT */ disable_all_wdts(); panic_print_str("CPU halted.\r\n"); while (1); diff --git a/components/esp_system/port/panic_handler.c b/components/esp_system/port/panic_handler.c index 07782b6fe5..3acf88a236 100644 --- a/components/esp_system/port/panic_handler.c +++ b/components/esp_system/port/panic_handler.c @@ -41,7 +41,7 @@ extern int _invalid_pc_placeholder; -extern void esp_panic_handler_reconfigure_wdts(void); +extern void esp_panic_handler_reconfigure_wdts(uint32_t timeout_ms); extern void esp_panic_handler(panic_info_t *); @@ -151,7 +151,7 @@ static void panic_handler(void *frame, bool pseudo_excause) } // Need to reconfigure WDTs before we stall any other CPU - esp_panic_handler_reconfigure_wdts(); + esp_panic_handler_reconfigure_wdts(1000); esp_rom_delay_us(1); // Stall all other cores diff --git a/docs/en/api-guides/fatal-errors.rst b/docs/en/api-guides/fatal-errors.rst index 2342325b58..017e790e65 100644 --- a/docs/en/api-guides/fatal-errors.rst +++ b/docs/en/api-guides/fatal-errors.rst @@ -69,7 +69,7 @@ Subsequent behavior of the panic handler can be set using :ref:`CONFIG_ESP_SYSTE Start GDB server which can communicate with GDB over console UART port. This option allows the user to debug a program at run time and set break points, alter the execution, etc. See `GDB Stub`_ for more details. -The behavior of the panic handler is affected by two other configuration options. +The behavior of the panic handler is affected by three other configuration options. - If :ref:`CONFIG_ESP_DEBUG_OCDAWARE` is enabled (which is the default), the panic handler will detect whether a JTAG debugger is connected. If it is, execution will be halted and control will be passed to the debugger. In this case, registers and backtrace are not dumped to the console, and GDBStub / Core Dump functions are not used. @@ -79,6 +79,8 @@ The behavior of the panic handler is affected by two other configuration options If this option is enabled, the panic handler code (including required UART functions) is placed in IRAM, and hence will decrease the usable memory space in SRAM. But this may be necessary to debug some complex issues with crashes while flash cache is disabled (for example, when writing to SPI flash) or when flash cache is corrupted when an exception is triggered. +- If :ref:`CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS` is enabled (disabled by default) and set to a number higher than 0, the panic handler will delay the reboot for that amount of time in seconds. This can help if the tool used to monitor serial output does not provide a possibility to stop and examine the serial output. In that case, delaying the reboot will allow users to examine and debug the panic handler output (backtrace, etc.) for the duration of the delay. After the delay, the device will reboot. The reset reason is preserved. + The following diagram illustrates the panic handler behavior: .. blockdiag:: diff --git a/tools/test_apps/system/panic/pytest_panic.py b/tools/test_apps/system/panic/pytest_panic.py index f60f384eaf..a923f8cf4e 100644 --- a/tools/test_apps/system/panic/pytest_panic.py +++ b/tools/test_apps/system/panic/pytest_panic.py @@ -5,6 +5,7 @@ import re from pprint import pformat from typing import List, Optional +import pexpect import pytest from test_panic_util import PanicTestDut @@ -354,3 +355,21 @@ def test_assert_cache_disabled( dut.expect_elf_sha256() dut.expect_none(['Guru Meditation', 'Re-entered core dump']) common_test(dut, config, expected_backtrace=get_default_backtrace(test_func_name)) + + +@pytest.mark.esp32 +@pytest.mark.parametrize('config', ['panic_delay'], indirect=True) +@pytest.mark.generic +def test_panic_delay(dut: PanicTestDut) -> None: + dut.expect_test_func_name('test_storeprohibited') + try: + dut.expect_exact('Rebooting...', timeout=4) + except pexpect.TIMEOUT: + # We are supposed to NOT find the output for the specified time + pass + else: + # If we actually match the output within the timeout, it means the delay didn't work + raise AssertionError('Rebooted too early, delay is too short') + + dut.expect_exact('Rebooting...', timeout=3) + dut.expect_exact('rst:0xc (SW_CPU_RESET)') diff --git a/tools/test_apps/system/panic/sdkconfig.ci.panic_delay b/tools/test_apps/system/panic/sdkconfig.ci.panic_delay new file mode 100644 index 0000000000..81c77706db --- /dev/null +++ b/tools/test_apps/system/panic/sdkconfig.ci.panic_delay @@ -0,0 +1,2 @@ +CONFIG_ESP_SYSTEM_PANIC_PRINT_REBOOT=y +CONFIG_ESP_SYSTEM_PANIC_REBOOT_DELAY_SECONDS=5