From 8f3a3181e67b20a8e7b0e8148c52d9345337c505 Mon Sep 17 00:00:00 2001 From: Ivan Grokhotkov Date: Fri, 20 Nov 2020 03:19:04 +0100 Subject: [PATCH] tools: decode esp32c3 stack dumps in idf_monitor.py This follows a similar approach as UART core dump handling in idf_monitor. Panic handler message is detected in the output, collected into a file, and the file is passed to the decoding script. In this case, the decoding script acts as a tiny GDB server, so we can ask GDB to perform the backtrace. --- tools/gdb_panic_server.py | 295 +++++++++++++++++++++++++++++ tools/idf_monitor.py | 88 ++++++++- tools/idf_py_actions/serial_ext.py | 4 + 3 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 tools/gdb_panic_server.py diff --git a/tools/gdb_panic_server.py b/tools/gdb_panic_server.py new file mode 100644 index 0000000000..ccf8a0c6d6 --- /dev/null +++ b/tools/gdb_panic_server.py @@ -0,0 +1,295 @@ +#!/usr/bin/env python +# coding=utf-8 +# +# A script which parses ESP-IDF panic handler output (registers & stack dump), +# and then acts as a GDB server over stdin/stdout, presenting the information +# from the panic handler to GDB. +# This allows for generating backtraces out of raw stack dumps on architectures +# where backtracing on the target side is not possible. +# +# Note that the "act as a GDB server" approach is somewhat a hack. +# A much nicer solution would have been to convert the panic handler output +# into a core file, and point GDB to the core file. +# However, RISC-V baremetal GDB currently lacks core dump support. +# +# The approach is inspired by Cesanta's ESP8266 GDB server: +# https://github.com/cesanta/mongoose-os/blob/27777c8977/platforms/esp8266/tools/serve_core.py +# +# Copyright 2020 Espressif Systems (Shanghai) Co. Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +from builtins import bytes +import argparse +import struct +import sys +import logging +import binascii +from collections import namedtuple + +from pyparsing import Literal, Word, nums, OneOrMore, srange, Group, Combine + +# Used for type annotations only. Silence linter warnings. +from pyparsing import ParseResults, ParserElement # noqa: F401 # pylint: disable=unused-import + +try: + import typing # noqa: F401 # pylint: disable=unused-import +except ImportError: + pass + +# pyparsing helper +hexnumber = srange("[0-9a-f]") + + +# List of registers to be passed to GDB, in the order GDB expects. +# The names should match those used in IDF panic handler. +# Registers not present in IDF panic handler output (like X0) will be assumed to be 0. +GDB_REGS_INFO_RISCV_ILP32 = [ + "X0", "RA", "SP", "GP", + "TP", "T0", "T1", "T2", + "S0/FP", "S1", "A0", "A1", + "A2", "A3", "A4", "A5", + "A6", "A7", "S2", "S3", + "S4", "S5", "S6", "S7", + "S8", "S9", "S10", "S11", + "T3", "T4", "T5", "T6", + "MEPC" +] + + +GDB_REGS_INFO = { + "esp32c3": GDB_REGS_INFO_RISCV_ILP32 +} + +PanicInfo = namedtuple("PanicInfo", "core_id regs stack_base_addr stack_data") + + +def build_riscv_panic_output_parser(): # type: () -> typing.Type[ParserElement] + """Builds a parser for the panic handler output using pyparsing""" + + # We don't match the first line, since "Guru Meditation" will not be printed in case of an abort: + # Guru Meditation Error: Core 0 panic'ed (Store access fault). Exception was unhandled. + + # Core 0 register dump: + reg_dump_header = Group(Literal("Core") + + Word(nums)("core_id") + + Literal("register dump:"))("reg_dump_header") + + # MEPC : 0x4200232c RA : 0x42009694 SP : 0x3fc93a80 GP : 0x3fc8b320 + reg_name = Word(srange("[A-Z_0-9/-]"))("name") + hexnumber_with_0x = Combine(Literal("0x") + Word(hexnumber)) + reg_value = hexnumber_with_0x("value") + reg_dump_one_reg = Group(reg_name + Literal(":") + reg_value) # not named because there will be OneOrMore + reg_dump_all_regs = Group(OneOrMore(reg_dump_one_reg))("regs") + reg_dump = Group(reg_dump_header + reg_dump_all_regs) # not named because there will be OneOrMore + reg_dumps = Group(OneOrMore(reg_dump))("reg_dumps") + + # Stack memory: + # 3fc93a80: 0x00000030 0x00000021 0x3fc8aedc 0x4200232a 0xa5a5a5a5 0xa5a5a5a5 0x3fc8aedc 0x420099b0 + stack_line = Group(Word(hexnumber)("base") + Literal(":") + + Group(OneOrMore(hexnumber_with_0x))("data")) + stack_dump = Group(Literal("Stack memory:") + + Group(OneOrMore(stack_line))("lines"))("stack_dump") + + # Parser for the complete panic output: + panic_output = reg_dumps + stack_dump + return panic_output + + +def get_stack_addr_and_data(res): # type: (ParseResults) -> typing.Tuple[int, bytes] + """ Extract base address and bytes from the parsed stack dump """ + stack_base_addr = 0 # First reported address in the dump + base_addr = 0 # keeps track of the address for the given line of the dump + bytes_in_line = 0 # bytes of stack parsed on the previous line; used to validate the next base address + stack_data = b"" # accumulates all the dumped stack data + for line in res.stack_dump.lines: + # update and validate the base address + prev_base_addr = base_addr + base_addr = int(line.base, 16) + if stack_base_addr == 0: + stack_base_addr = base_addr + else: + assert base_addr == prev_base_addr + bytes_in_line + + # convert little-endian hex words to byte representation + words = [int(w, 16) for w in line.data] + line_data = b"".join([struct.pack(" PanicInfo + """ Decode panic handler output from a file """ + panic_output = build_riscv_panic_output_parser() + results = panic_output.searchString(panic_text) + if len(results) != 1: + raise ValueError("Couldn't parse panic handler output") + res = results[0] + + if len(res.reg_dumps) > 1: + raise NotImplementedError("Handling of multi-core register dumps not implemented") + + # Build a dict of register names/values + rd = res.reg_dumps[0] + core_id = int(rd.reg_dump_header.core_id) + regs = dict() + for reg in rd.regs: + reg_value = int(reg.value, 16) + regs[reg.name] = reg_value + + stack_base_addr, stack_data = get_stack_addr_and_data(res) + + return PanicInfo(core_id=core_id, + regs=regs, + stack_base_addr=stack_base_addr, + stack_data=stack_data) + + +PANIC_OUTPUT_PARSERS = { + "esp32c3": parse_idf_riscv_panic_output +} + + +class GdbServer(object): + def __init__(self, panic_info, target, log_file=None): # type: (PanicInfo, str, str) -> None + self.panic_info = panic_info + self.in_stream = sys.stdin + self.out_stream = sys.stdout + self.reg_list = GDB_REGS_INFO[target] + + self.logger = logging.getLogger("GdbServer") + if log_file: + handler = logging.FileHandler(log_file, "w+") + self.logger.setLevel(logging.DEBUG) + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + self.logger.addHandler(handler) + + def run(self): # type: () -> None + """ Process GDB commands from stdin until GDB tells us to quit """ + buffer = "" + while True: + buffer += self.in_stream.read(1) + if len(buffer) > 3 and buffer[-3] == '#': + self._handle_command(buffer) + buffer = "" + + def _handle_command(self, buffer): # type: (str) -> None + command = buffer[1:-3] # ignore checksums + # Acknowledge the command + self.out_stream.write("+") + self.out_stream.flush() + self.logger.debug("Got command: %s", command) + if command == "?": + # report sigtrap as the stop reason; the exact reason doesn't matter for backtracing + self._respond("T05") + elif command.startswith("Hg") or command.startswith("Hc"): + # Select thread command + self._respond("OK") + elif command == "qfThreadInfo": + # Get list of threads. + # Only one thread for now, can be extended to show one thread for each core, + # if we dump both cores (e.g. on an interrupt watchdog) + self._respond("m1") + elif command == "qC": + # That single thread is selected. + self._respond("QC1") + elif command == "g": + # Registers read + self._respond_regs() + elif command.startswith("m"): + # Memory read + addr, size = [int(v, 16) for v in command[1:].split(",")] + self._respond_mem(addr, size) + elif command.startswith("vKill") or command == "k": + # Quit + self._respond("OK") + raise SystemExit(0) + else: + # Empty response required for any unknown command + self._respond("") + + def _respond(self, data): # type: (str) -> None + # calculate checksum + data_bytes = bytes(data.encode("ascii")) # bytes() for Py2 compatibility + checksum = sum(data_bytes) & 0xff + # format and write the response + res = "${}#{:02x}".format(data, checksum) + self.logger.debug("Wrote: %s", res) + self.out_stream.write(res) + self.out_stream.flush() + # get the result ('+' or '-') + ret = self.in_stream.read(1) + self.logger.debug("Response: %s", ret) + if ret != '+': + sys.stderr.write("GDB responded with '-' to {}".format(res)) + raise SystemExit(1) + + def _respond_regs(self): # type: () -> None + response = "" + for reg_name in self.reg_list: + # register values are reported as hexadecimal strings + # in target byte order (i.e. LSB first for RISC-V) + reg_val = self.panic_info.regs.get(reg_name, 0) + reg_bytes = struct.pack(" None + stack_addr_min = self.panic_info.stack_base_addr + stack_data = self.panic_info.stack_data + stack_len = len(self.panic_info.stack_data) + stack_addr_max = stack_addr_min + stack_len + + # For any memory address that is not on the stack, pretend the value is 0x00. + # GDB should never ask us for program memory, it will be obtained from the ELF file. + def in_stack(addr): + return stack_addr_min <= addr < stack_addr_max + + result = "" + for addr in range(start_addr, start_addr + size): + if not in_stack(addr): + result += "00" + else: + result += "{:02x}".format(stack_data[addr - stack_addr_min]) + + self._respond(result) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("input_file", type=argparse.FileType("r"), + help="File containing the panic handler output") + parser.add_argument("--target", choices=GDB_REGS_INFO.keys(), + help="Chip to use (determines the architecture)") + parser.add_argument("--gdb-log", default=None, + help="If specified, the file for logging GDB server debug information") + args = parser.parse_args() + + panic_info = PANIC_OUTPUT_PARSERS[args.target](args.input_file.read()) + + server = GdbServer(panic_info, target=args.target, log_file=args.gdb_log) + try: + server.run() + except KeyboardInterrupt: + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/tools/idf_monitor.py b/tools/idf_monitor.py index 67236c26e3..e2940e9f41 100755 --- a/tools/idf_monitor.py +++ b/tools/idf_monitor.py @@ -137,6 +137,19 @@ COREDUMP_DONE = 2 COREDUMP_DECODE_DISABLE = "disable" COREDUMP_DECODE_INFO = "info" +# panic handler related messages +PANIC_START = r"Core \s*\d+ register dump:" +PANIC_END = b"ELF file SHA256:" +PANIC_STACK_DUMP = b"Stack memory:" + +# panic handler decoding states +PANIC_IDLE = 0 +PANIC_READING = 1 + +# panic handler decoding options +PANIC_DECODE_DISABLE = "disable" +PANIC_DECODE_BACKTRACE = "backtrace" + class StoppableThread(object): """ @@ -486,6 +499,8 @@ class Monitor(object): def __init__(self, serial_instance, elf_file, print_filter, make="make", encrypted=False, toolchain_prefix=DEFAULT_TOOLCHAIN_PREFIX, eol="CRLF", decode_coredumps=COREDUMP_DECODE_INFO, + decode_panic=PANIC_DECODE_DISABLE, + target=None, websocket_client=None): super(Monitor, self).__init__() self.event_queue = queue.Queue() @@ -519,6 +534,7 @@ class Monitor(object): self.encrypted = encrypted self.toolchain_prefix = toolchain_prefix self.websocket_client = websocket_client + self.target = target # internal state self._last_line_part = b"" @@ -533,6 +549,9 @@ class Monitor(object): self._decode_coredumps = decode_coredumps self._reading_coredump = COREDUMP_IDLE self._coredump_buffer = b"" + self._decode_panic = decode_panic + self._reading_panic = PANIC_IDLE + self._panic_buffer = b"" def invoke_processing_last_line(self): self.event_queue.put((TAG_SERIAL_FLUSH, b''), False) @@ -602,6 +621,7 @@ class Monitor(object): if line != b"": if self._serial_check_exit and line == self.console_parser.exit_key.encode('latin-1'): raise SerialStopException() + self.check_panic_decode_trigger(line) self.check_coredump_trigger_before_print(line) if self._force_line_print or self._line_matcher.match(line.decode(errors="ignore")): self._print(line + b'\n') @@ -806,6 +826,59 @@ class Monitor(object): except OSError as e: yellow_print("Couldn't remote temporary core dump file ({})".format(e)) + def check_panic_decode_trigger(self, line): + if self._decode_panic == PANIC_DECODE_DISABLE: + return + + if self._reading_panic == PANIC_IDLE and re.search(PANIC_START, line.decode("ascii", errors='ignore')): + self._reading_panic = PANIC_READING + yellow_print("Stack dump detected") + + if self._reading_panic == PANIC_READING and PANIC_STACK_DUMP in line: + self._output_enabled = False + + if self._reading_panic == PANIC_READING: + self._panic_buffer += line.replace(b'\r', b'') + b'\n' + + if self._reading_panic == PANIC_READING and PANIC_END in line: + self._reading_panic = PANIC_IDLE + self._output_enabled = True + self.process_panic_output(self._panic_buffer) + self._panic_buffer = b"" + + def process_panic_output(self, panic_output): + panic_output_decode_script = os.path.join(os.path.dirname(__file__), "..", "tools", "gdb_panic_server.py") + panic_output_file = None + try: + # On Windows, the temporary file can't be read unless it is closed. + # Set delete=False and delete the file manually later. + with tempfile.NamedTemporaryFile(mode="wb", delete=False) as panic_output_file: + panic_output_file.write(panic_output) + panic_output_file.flush() + + cmd = [self.toolchain_prefix + "gdb", + "--batch", "-n", + self.elf_file, + "-ex", "target remote | \"{python}\" \"{script}\" --target {target} \"{output_file}\"" + .format(python=sys.executable, + script=panic_output_decode_script, + target=self.target, + output_file=panic_output_file.name), + "-ex", "bt"] + + output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + yellow_print("\nBacktrace:\n\n") + self._print(output) + except subprocess.CalledProcessError as e: + yellow_print("Failed to run gdb_panic_server.py script: {}\n{}\n\n".format(e, e.output)) + self._print(panic_output) + finally: + if panic_output_file is not None: + try: + os.unlink(panic_output_file.name) + except OSError as e: + yellow_print("Couldn't remove temporary panic output file ({})".format(e)) + def run_gdb(self): with self: # disable console control sys.stderr.write(ANSI_NORMAL) @@ -981,6 +1054,19 @@ def main(): help="Handling of core dumps found in serial output" ) + parser.add_argument( + '--decode-panic', + choices=[PANIC_DECODE_BACKTRACE, PANIC_DECODE_DISABLE], + default=PANIC_DECODE_DISABLE, + help="Handling of panic handler info found in serial output" + ) + + parser.add_argument( + '--target', + required=False, + help="Target name (used when stack dump decoding is enabled)" + ) + parser.add_argument( '--ws', default=os.environ.get('ESP_IDF_MONITOR_WS', None), @@ -1029,7 +1115,7 @@ def main(): try: monitor = Monitor(serial_instance, args.elf_file.name, args.print_filter, args.make, args.encrypted, args.toolchain_prefix, args.eol, - args.decode_coredumps, + args.decode_coredumps, args.decode_panic, args.target, ws) yellow_print('--- idf_monitor on {p.name} {p.baudrate} ---'.format( diff --git a/tools/idf_py_actions/serial_ext.py b/tools/idf_py_actions/serial_ext.py index f947f2a5ef..da36e0d337 100644 --- a/tools/idf_py_actions/serial_ext.py +++ b/tools/idf_py_actions/serial_ext.py @@ -94,6 +94,10 @@ def action_extensions(base_actions, project_path): if coredump_decode is not None: monitor_args += ["--decode-coredumps", coredump_decode] + target_arch_riscv = get_sdkconfig_value(project_desc["config_file"], "CONFIG_IDF_TARGET_ARCH_RISCV") + if target_arch_riscv: + monitor_args += ["--decode-panic", "backtrace", "--target", project_desc["target"]] + if print_filter is not None: monitor_args += ["--print_filter", print_filter] monitor_args += [elf_file]