From b68b6566f9f967bc7d011f97aab1ae17e82ef390 Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 13 Apr 2026 14:30:49 -0700 Subject: [PATCH 1/2] PolarFire SoC M-Mode: Fix L2 scratchpad init, QSPI programmer, and add WDT support --- Makefile | 12 +- .../examples/polarfire_mpfs250_m_qspi.config | 37 ++++-- docs/Targets.md | 22 ++++ hal/mpfs250-m.ld | 15 ++- hal/mpfs250.c | 110 +++++++++++++----- hal/mpfs250.h | 13 +++ src/boot_riscv.c | 29 +++-- src/boot_riscv_start.S | 27 ++++- src/update_ram.c | 15 +++ tools/scripts/mpfs_qspi_prog.py | 9 +- 10 files changed, 229 insertions(+), 60 deletions(-) diff --git a/Makefile b/Makefile index 0285fbd333..f4f35e3378 100644 --- a/Makefile +++ b/Makefile @@ -451,11 +451,20 @@ test-app/image_v1_signed.bin: $(BOOT_IMG) keytools_check @echo "\t[SIGN] $(BOOT_IMG)" @echo "\tSECONDARY_SIGN_OPTIONS=$(SECONDARY_SIGN_OPTIONS)" @echo "\tSECONDARY_PRIVATE_KEY=$(SECONDARY_PRIVATE_KEY)" - +ifeq ($(STRIP_ELF),1) + @echo "\t[STRIP] $(BOOT_IMG)" + $(Q)$(OBJCOPY) --strip-debug $(BOOT_IMG) $(BOOT_IMG).stripped + $(Q)(test $(SIGN) = NONE) || $(SIGN_ENV) $(SIGN_TOOL) $(SIGN_OPTIONS) \ + $(SECONDARY_SIGN_OPTIONS) $(BOOT_IMG).stripped $(PRIVATE_KEY) \ + $(SECONDARY_PRIVATE_KEY) 1 || true + $(Q)(test $(SIGN) = NONE) && $(SIGN_ENV) $(SIGN_TOOL) $(SIGN_OPTIONS) $(BOOT_IMG).stripped 1 || true + $(Q)mv test-app/image.elf_v1_signed.bin test-app/image_v1_signed.bin +else $(Q)(test $(SIGN) = NONE) || $(SIGN_ENV) $(SIGN_TOOL) $(SIGN_OPTIONS) \ $(SECONDARY_SIGN_OPTIONS) $(BOOT_IMG) $(PRIVATE_KEY) \ $(SECONDARY_PRIVATE_KEY) 1 || true $(Q)(test $(SIGN) = NONE) && $(SIGN_ENV) $(SIGN_TOOL) $(SIGN_OPTIONS) $(BOOT_IMG) 1 || true +endif test-app/image.elf: wolfboot.elf $(Q)$(MAKE) -C test-app WOLFBOOT_ROOT="$(WOLFBOOT_ROOT)" ELF_FLASH_SCATTER="$(ELF_FLASH_SCATTER)" image.elf @@ -537,6 +546,7 @@ $(LSCRIPT): $(LSCRIPT_IN) FORCE sed -e "s/@WOLFBOOT_LOAD_BASE@/$(WOLFBOOT_LOAD_BASE)/g" | \ sed -e "s/@BOOTLOADER_START@/$(BOOTLOADER_START)/g" | \ sed -e "s/@IMAGE_HEADER_SIZE@/$(IMAGE_HEADER_SIZE)/g" | \ + sed -e "s/@WOLFBOOT_LOAD_ADDRESS@/$(WOLFBOOT_LOAD_ADDRESS)/g" | \ sed -e "s/@FSP_S_LOAD_BASE@/$(FSP_S_LOAD_BASE)/g" | \ sed -e "s/@WOLFBOOT_L2LIM_SIZE@/$(WOLFBOOT_L2LIM_SIZE)/g" | \ sed -e "s/@L2SRAM_ADDR@/$(L2SRAM_ADDR)/g" \ diff --git a/config/examples/polarfire_mpfs250_m_qspi.config b/config/examples/polarfire_mpfs250_m_qspi.config index a4bd89c22d..192daa471c 100644 --- a/config/examples/polarfire_mpfs250_m_qspi.config +++ b/config/examples/polarfire_mpfs250_m_qspi.config @@ -38,14 +38,29 @@ WOLFTPM?=0 ELF?=1 #DEBUG_ELF?=1 +# Strip debug symbols from test-app ELF before signing. +# Required for M-mode: unstripped ELF (~150KB) is too large for L2 Scratch. +# Stripped ELF is typically ~5KB. +STRIP_ELF?=1 + +# Watchdog timer configuration (default: disabled) +# When commented out, the WDT is disabled in hal_init() and re-enabled +# with the boot ROM default in hal_prepare_boot() before do_boot. +# Uncomment -DWATCHDOG to keep the WDT enabled with a generous timeout +# for the duration of wolfBoot. Verify is bounded at ~5s; default 30s +# avoids the need to pet the WDT during long ECDSA verify. +#CFLAGS_EXTRA+=-DWATCHDOG +#CFLAGS_EXTRA+=-DWATCHDOG_TIMEOUT_MS=30000 + OPTIMIZATION_LEVEL=1 # M-Mode Configuration # Runs on E51 core in Machine Mode from L2 SRAM RISCV_MMODE?=1 -# Stack size per hart (reduced for L2 SRAM constraints) -CFLAGS_EXTRA+=-DSTACK_SIZE_PER_HART=8192 +# Stack size per hart: set to 0 for M-mode (only E51/hart 0 runs; +# secondary harts park in eNVM WFI loop and never use L2 Scratch stacks) +CFLAGS_EXTRA+=-DSTACK_SIZE_PER_HART=0 # E51 core lacks RISC-V crypto extensions (Zknh), use portable C implementations NO_ASM?=1 @@ -57,7 +72,7 @@ SPI_FLASH?=0 # SPI Flash Controller Selection: # MPFS_SC_SPI: Use SC QSPI Controller (0x37020100) for fabric-connected flash. -# Direct register access to System Controller's QSPI instance. +# Required for Video Kit (flash is fabric-connected, not on MSS pins). # DEFAULT: Use MSS QSPI Controller (0x21000000) for external flash # on MSS QSPI pins. CFLAGS_EXTRA+=-DMPFS_SC_SPI @@ -71,12 +86,12 @@ DISK_EMMC?=0 WOLFBOOT_ORIGIN?=0x0A000000 # Load application to L2 Scratchpad (above wolfBoot code, below stack) -# wolfBoot occupies ~40KB at 0x0A000000, stack is 64KB at top of 256KB. +# wolfBoot is ~36KB; 128KB reserved for growth headroom. # Note: update_ram places header at (LOAD_ADDRESS - IMAGE_HEADER_SIZE), # so offset by header size to keep header aligned. # IMPORTANT: Strip debug symbols from test-app ELF before signing to keep -# the image small enough to fit in L2 Scratchpad (~150KB available). -WOLFBOOT_LOAD_ADDRESS?=0x0A010200 +# the image small enough to fit in L2 Scratchpad (~95KB available). +WOLFBOOT_LOAD_ADDRESS?=0x0A020200 # Flash geometry (64 KB sector to match QSPI flash) WOLFBOOT_SECTOR_SIZE?=0x10000 @@ -104,9 +119,11 @@ CFLAGS_EXTRA+=-DWOLFBOOT_SHA_BLOCK_SIZE=4096 # Uncomment to run test during hal_init() #CFLAGS_EXTRA+=-DTEST_EXT_FLASH -# UART QSPI programmer (disabled by default) -# When enabled, wolfBoot prompts on UART at startup to receive a signed firmware -# image and write it to QSPI flash -- no Libero/JTAG tool required for updates. +# UART QSPI programmer (enabled for M-mode) +# In M-mode, QSPI flash is not accessible via Libero/JTAG, so this UART-based +# programmer is the primary method to load the test-app image into QSPI. +# wolfBoot prompts on UART at startup ("Press P within 3s") to receive a signed +# firmware image and write it to QSPI flash. # Use: python3 tools/scripts/mpfs_qspi_prog.py [qspi_offset] # Requires EXT_FLASH=1 (already set) and DEBUG_UART=1. -UART_QSPI_PROGRAM?=0 +UART_QSPI_PROGRAM?=1 diff --git a/docs/Targets.md b/docs/Targets.md index fbc02e6181..30216066db 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -827,6 +827,28 @@ Key build settings that differ between configurations: > **Note:** All configurations require `NO_ASM=1` because the MPFS250 U54/E51 cores lack RISC-V > crypto extensions (Zknh); wolfBoot uses portable C implementations for all cryptographic operations. +### M-Mode Optional Build Flags + +These flags apply to `polarfire_mpfs250_m_qspi.config` and are added via `CFLAGS_EXTRA+=-D...`. + +| Flag | Default | Description | +|------|---------|-------------| +| `WATCHDOG` | undefined (disabled) | When defined, the E51 watchdog timer is **kept enabled** during wolfBoot operation with a generous timeout. When undefined, the WDT is **disabled** in `hal_init()` and re-enabled with the boot ROM default in `hal_prepare_boot()` before jumping to the application. Either way, the application receives a normal WDT. | +| `WATCHDOG_TIMEOUT_MS` | `30000` (30 s) | Watchdog timeout in milliseconds when `WATCHDOG` is defined. ECDSA P-384 verification on E51 with portable C math is bounded at ~5 s; the default 30 s avoids any need to refresh the WDT during the long verify call. | + +#### Stack overflow detection + +The trap handler in `src/boot_riscv.c` automatically detects stack overflow on synchronous exceptions. When a trap fires with `SP < _main_hart_stack_bottom`, it prints: + +``` +TRAP: cause=2 epc=A000740 tval=0 sp=A02FFE8 +STACK OVERFLOW: sp=A02FFE8 < bottom=A030000 (under by 24) +``` + +This is helpful for diagnosing illegal-instruction TRAPs at random valid `.text` addresses, which are the classic signature of stack overflow corrupting the return address. + +The current `STACK_SIZE` in `hal/mpfs250-m.ld` is **32 KB**. Measured peak for ECC384 + SHA384 + SPMATHALL + NO_ASM is ~6 KB (5x headroom). + ### PolarFire SoC Files `hal/mpfs250.c` - Hardware abstraction layer (UART, QSPI, SD/eMMC, multi-hart) diff --git a/hal/mpfs250-m.ld b/hal/mpfs250-m.ld index 90f4814d72..f0f8aa76b9 100644 --- a/hal/mpfs250-m.ld +++ b/hal/mpfs250-m.ld @@ -33,9 +33,9 @@ MEMORY } /* Stack size for the boot hart (E51 in M-mode) - * ECC384 signature verification with sp_int needs significant stack - * for big number temporaries and point multiplication */ -PROVIDE(STACK_SIZE = 64k); + * ECC384 + SHA384 + SPMATHALL + NO_ASM measured peak: ~6KB. + * 32KB provides 5x headroom. */ +PROVIDE(STACK_SIZE = 32k); SECTIONS { @@ -115,9 +115,9 @@ PROVIDE(_start_heap = _end); * * Stack sizes (defined in config or header): * STACK_SIZE_PER_HART = 8192 (8KB per hart) - * STACK_SIZE = 64K (64KB for main hart E51) + * STACK_SIZE = 32K (32KB for main hart E51) * - * Total stack area: STACK_SIZE + 4 * STACK_SIZE_PER_HART = 48KB + * Total stack area: STACK_SIZE + 4 * STACK_SIZE_PER_HART */ PROVIDE(STACK_SIZE_PER_HART = 8192); @@ -144,3 +144,8 @@ PROVIDE(__global_pointer$ = _global_pointer); /* Size of text section to copy (for startup code) */ PROVIDE(_text_size = _end_text - _start_text_sram); + +/* Build-time safety: ensure wolfBoot binary does not overlap image load area. + * Image header is loaded at (WOLFBOOT_LOAD_ADDRESS - IMAGE_HEADER_SIZE). */ +ASSERT(_end <= @WOLFBOOT_LOAD_ADDRESS@ - @IMAGE_HEADER_SIZE@, + "ERROR: wolfBoot binary overlaps image load area! Increase WOLFBOOT_LOAD_ADDRESS") diff --git a/hal/mpfs250.c b/hal/mpfs250.c index c808eeedce..22f9711302 100644 --- a/hal/mpfs250.c +++ b/hal/mpfs250.c @@ -105,6 +105,27 @@ static __attribute__((noinline)) void udelay(uint32_t us) extern uint8_t _main_hart_hls; /* linker-provided address symbol; typed as uint8_t to avoid size confusion */ +/* Watchdog timeout configuration. + * WATCHDOG=0 (default): WDT disabled in hal_init() then restored to boot + * ROM defaults in hal_prepare_boot() before do_boot. + * WATCHDOG=1: WDT kept enabled with WATCHDOG_TIMEOUT_MS during wolfBoot. + * Verify is bounded at ~5s; default 30s leaves ample headroom and avoids + * the need to pet the WDT during the long ECDSA verify call. */ +#ifdef WATCHDOG +# ifndef WATCHDOG_TIMEOUT_MS +# define WATCHDOG_TIMEOUT_MS 30000U +# endif +/* MPFS MSS WDT clock is AHB / 256 ≈ 150 MHz / 256 ≈ 585 kHz at S-mode rate + * but ~80 MHz / 256 ≈ 312 kHz on E51 reset clocks. Use a conservative + * 300 ticks/ms; the actual rate may be a bit higher but a slightly longer + * timeout is safe. Caller can override WATCHDOG_TIMEOUT_MS at build time. */ +# define WATCHDOG_TIMEOUT_TICKS ((WATCHDOG_TIMEOUT_MS) * 300U) +#endif + +/* Saved boot ROM watchdog value, restored in hal_prepare_boot() */ +static uint32_t mpfs_wdt_default_mvrp = 0; + + /* CLINT MSIP register for IPI delivery */ #define CLINT_MSIP_REG(hart) (*(volatile uint32_t*)(CLINT_BASE + (hart) * 4)) @@ -157,6 +178,21 @@ static void qspi_uart_program(void); void hal_init(void) { #ifdef WOLFBOOT_RISCV_MMODE + /* Capture boot ROM WDT default for restoration in hal_prepare_boot() */ + mpfs_wdt_default_mvrp = MSS_WDT_MVRP(MSS_WDT_E51_BASE); + +#ifndef WATCHDOG + /* WATCHDOG=0 (default): disable WDT for the duration of wolfBoot. + * It will be re-enabled in hal_prepare_boot() before do_boot. */ + MSS_WDT_CONTROL(MSS_WDT_E51_BASE) &= ~MSS_WDT_CTRL_ENABLE; +#else + /* WATCHDOG=1: keep WDT enabled with a generous timeout for crypto. + * Verify is bounded at ~5s; configure a much larger timeout so we + * never have to pet the WDT during ECDSA verify. */ + MSS_WDT_REFRESH(MSS_WDT_E51_BASE) = 0xDEADC0DEU; + MSS_WDT_MVRP(MSS_WDT_E51_BASE) = WATCHDOG_TIMEOUT_TICKS; +#endif + mpfs_config_l2_cache(); mpfs_signal_main_hart_started(); #endif @@ -370,9 +406,15 @@ int hal_dts_fixup(void* dts_addr) } void hal_prepare_boot(void) { +#ifdef WOLFBOOT_RISCV_MMODE + /* Restore boot ROM WDT default so the application sees a normal WDT. + * Refresh first so the timer doesn't fire immediately after we apply + * the new MVRP. Re-enable in case it was disabled by hal_init(). */ + MSS_WDT_REFRESH(MSS_WDT_E51_BASE) = 0xDEADC0DEU; + MSS_WDT_MVRP(MSS_WDT_E51_BASE) = mpfs_wdt_default_mvrp; + MSS_WDT_CONTROL(MSS_WDT_E51_BASE) |= MSS_WDT_CTRL_ENABLE; +#endif /* reset the eMMC/SD card? */ - - } void RAMFUNCTION hal_flash_unlock(void) @@ -541,7 +583,9 @@ static int qspi_transfer_block(uint8_t read_mode, const uint8_t *cmd, timeout = QSPI_TIMEOUT_TRIES; while (!(QSPI_STATUS & QSPI_STATUS_READY) && --timeout); if (timeout == 0) { + #ifdef DEBUG_QSPI wolfBoot_printf("QSPI: Timeout waiting for READY\n"); + #endif return -1; } @@ -587,7 +631,9 @@ static int qspi_transfer_block(uint8_t read_mode, const uint8_t *cmd, timeout = QSPI_TIMEOUT_TRIES; while (!(QSPI_STATUS & QSPI_STATUS_TXAVAIL) && --timeout); if (timeout == 0) { + #ifdef DEBUG_QSPI wolfBoot_printf("QSPI: TX FIFO full timeout\n"); + #endif return -2; } QSPI_TX_DATA = cmd[i]; @@ -600,8 +646,10 @@ static int qspi_transfer_block(uint8_t read_mode, const uint8_t *cmd, timeout = QSPI_RX_TIMEOUT_TRIES; while (!(QSPI_STATUS & QSPI_STATUS_RXAVAIL) && --timeout); if (timeout == 0) { + #ifdef DEBUG_QSPI wolfBoot_printf("QSPI: RX timeout at byte %d, status=0x%x\n", i, QSPI_STATUS); + #endif return -3; } data[i] = QSPI_RX_DATA; @@ -610,7 +658,9 @@ static int qspi_transfer_block(uint8_t read_mode, const uint8_t *cmd, timeout = QSPI_RX_TIMEOUT_TRIES; while (!(QSPI_STATUS & QSPI_STATUS_RXDONE) && --timeout); if (timeout == 0) { + #ifdef DEBUG_QSPI wolfBoot_printf("QSPI: RXDONE timeout\n"); + #endif return -5; } } else { @@ -625,7 +675,9 @@ static int qspi_transfer_block(uint8_t read_mode, const uint8_t *cmd, timeout = QSPI_TIMEOUT_TRIES; while (!(QSPI_STATUS & QSPI_STATUS_TXAVAIL) && --timeout); if (timeout == 0) { + #ifdef DEBUG_QSPI wolfBoot_printf("QSPI: TX data timeout\n"); + #endif return -4; } QSPI_TX_DATA = data[i]; @@ -636,8 +688,10 @@ static int qspi_transfer_block(uint8_t read_mode, const uint8_t *cmd, timeout = QSPI_TIMEOUT_TRIES; while (!(QSPI_STATUS & QSPI_STATUS_TXDONE) && --timeout); if (timeout == 0) { + #ifdef DEBUG_QSPI wolfBoot_printf("QSPI: TXDONE timeout, status=0x%x\n", QSPI_STATUS); + #endif return -5; } } @@ -904,7 +958,8 @@ int ext_flash_erase(uintptr_t address, int len) #define QSPI_PROG_CHUNK 256 #define QSPI_PROG_ACK 0x06 -#define QSPI_RX_TIMEOUT_MS 5000U /* 5 s per byte — aborts if host disappears */ +#define QSPI_RX_TIMEOUT_MS 10000U /* 10 s per byte — aborts if host disappears */ + /* Returns 0-255 on success, -1 on timeout (so the boot path is never deadlocked). */ static int uart_qspi_rx(void) @@ -938,7 +993,11 @@ static void qspi_uart_program(void) uint32_t i, s; uint8_t chunk[QSPI_PROG_CHUNK]; - wolfBoot_printf("QSPI-PROG: Press 'P' within 3s to program flash\r\n"); + /* Use uart_qspi_puts (direct UART) for ALL programmer output. + * wolfBoot_printf uses uart_write which adds \r before \n and may + * leave stale bytes in the UART TX pipeline that corrupt the + * binary ACK/data protocol after ERASED. */ + uart_qspi_puts("QSPI-PROG: Press 'P' within 3s to program flash\r\n"); /* Drain any stale RX bytes before opening the window */ while (MMUART_LSR(DEBUG_UART_BASE) & MSS_UART_DR) @@ -954,13 +1013,10 @@ static void qspi_uart_program(void) } if (ch != 'P' && ch != 'p') { - wolfBoot_printf("QSPI-PROG: No trigger (got 0x%02x LSR=0x%02x), booting\r\n", - (unsigned)ch, - (unsigned)MMUART_LSR(DEBUG_UART_BASE)); + uart_qspi_puts("QSPI-PROG: No trigger, booting\r\n"); return; } - wolfBoot_printf("QSPI-PROG: Entering programmer mode\r\n"); uart_qspi_puts("READY\r\n"); /* Receive destination address then data length (4 bytes LE each) */ @@ -968,7 +1024,7 @@ static void qspi_uart_program(void) for (i = 0; i < 4; i++) { int b = uart_qspi_rx(); if (b < 0) { - wolfBoot_printf("QSPI-PROG: RX timeout receiving addr\r\n"); + uart_qspi_puts("QSPI-PROG: RX timeout (addr)\r\n"); return; } addr |= ((uint32_t)(uint8_t)b << (i * 8)); @@ -977,54 +1033,49 @@ static void qspi_uart_program(void) for (i = 0; i < 4; i++) { int b = uart_qspi_rx(); if (b < 0) { - wolfBoot_printf("QSPI-PROG: RX timeout receiving size\r\n"); + uart_qspi_puts("QSPI-PROG: RX timeout (size)\r\n"); return; } size |= ((uint32_t)(uint8_t)b << (i * 8)); } - wolfBoot_printf("QSPI-PROG: addr=0x%x size=%u bytes\r\n", addr, size); - if (size == 0 || size > 0x200000U) { - wolfBoot_printf("QSPI-PROG: Invalid size, aborting\r\n"); + uart_qspi_puts("QSPI-PROG: Invalid size\r\n"); return; } - /* Reject writes to unaligned or out-of-partition addresses before any erase */ + /* Reject writes to unaligned or out-of-partition addresses */ if ((addr & (FLASH_SECTOR_SIZE - 1U)) != 0U) { - wolfBoot_printf("QSPI-PROG: addr 0x%x not sector-aligned, aborting\r\n", addr); + uart_qspi_puts("QSPI-PROG: Not sector-aligned\r\n"); return; } if (!((addr >= WOLFBOOT_PARTITION_BOOT_ADDRESS && addr + size <= WOLFBOOT_PARTITION_BOOT_ADDRESS + WOLFBOOT_PARTITION_SIZE) || (addr >= WOLFBOOT_PARTITION_UPDATE_ADDRESS && addr + size <= WOLFBOOT_PARTITION_UPDATE_ADDRESS + WOLFBOOT_PARTITION_SIZE))) { - wolfBoot_printf("QSPI-PROG: addr 0x%x+%u outside allowed partitions, aborting\r\n", - addr, size); + uart_qspi_puts("QSPI-PROG: Outside partition\r\n"); return; } - /* Erase all required sectors (FLASH_SECTOR_SIZE = 64 KB) */ + /* Erase all required sectors */ n_sectors = (size + FLASH_SECTOR_SIZE - 1) / FLASH_SECTOR_SIZE; - wolfBoot_printf("QSPI-PROG: Erasing %u sector(s) at 0x%x...\r\n", - n_sectors, addr); + uart_qspi_puts("QSPI-PROG: Erasing...\r\n"); ext_flash_unlock(); for (s = 0; s < n_sectors; s++) { int ret = ext_flash_erase(addr + s * FLASH_SECTOR_SIZE, FLASH_SECTOR_SIZE); if (ret < 0) { - wolfBoot_printf("QSPI-PROG: Erase failed at 0x%x (ret %d)\r\n", - addr + s * FLASH_SECTOR_SIZE, ret); + uart_qspi_puts("QSPI-PROG: Erase failed\r\n"); ext_flash_lock(); return; } } - /* "ERASED\r\n" must be the last bytes before the first ACK (0x06). - * Do not insert any wolfBoot_printf between here and the transfer loop. */ uart_qspi_puts("ERASED\r\n"); - /* Chunk transfer: wolfBoot requests each 256-byte block with ACK 0x06 */ + /* Chunk transfer: wolfBoot requests each 256-byte block with ACK 0x06. + * No wolfBoot_printf allowed in this loop — only direct UART via + * uart_qspi_tx/uart_qspi_puts to avoid protocol corruption. */ written = 0; while (written < size) { int ret; @@ -1037,8 +1088,7 @@ static void qspi_uart_program(void) for (i = 0; i < chunk_len; i++) { int b = uart_qspi_rx(); if (b < 0) { - wolfBoot_printf("QSPI-PROG: RX timeout at 0x%x+%u\r\n", - addr + written, i); + uart_qspi_puts("QSPI-PROG: RX timeout\r\n"); ext_flash_lock(); return; } @@ -1047,8 +1097,7 @@ static void qspi_uart_program(void) ret = ext_flash_write(addr + written, chunk, (int)chunk_len); if (ret < 0) { - wolfBoot_printf("QSPI-PROG: Write failed at 0x%x (ret %d)\r\n", - addr + written, ret); + uart_qspi_puts("QSPI-PROG: Write failed\r\n"); ext_flash_lock(); return; } @@ -1056,9 +1105,7 @@ static void qspi_uart_program(void) } ext_flash_lock(); - wolfBoot_printf("QSPI-PROG: Wrote %u bytes to 0x%x\r\n", written, addr); uart_qspi_puts("DONE\r\n"); - wolfBoot_printf("QSPI-PROG: Done, continuing boot\r\n"); } #endif /* UART_QSPI_PROGRAM */ @@ -1292,6 +1339,7 @@ static void uart_init_base(unsigned long base) MMUART_IER(base) = 0u; MMUART_FCR(base) = CLEAR_RX_FIFO_MASK | CLEAR_TX_FIFO_MASK | RXRDY_TXRDYN_EN_MASK; MMUART_MCR(base) &= ~(LOOP_MASK | RLOOP_MASK); + MMUART_MCR(base) |= (1U << 1); /* Assert RTS — required for USB-UART bridge CTS */ MMUART_MM1(base) &= ~(E_MSB_TX_MASK | E_MSB_RX_MASK); MMUART_MM2(base) &= ~(EAFM_MASK | ESWM_MASK); MMUART_MM0(base) &= ~(ETTG_MASK | ERTO_MASK | EFBR_MASK); diff --git a/hal/mpfs250.h b/hal/mpfs250.h index 497bee4a88..f6983e892e 100644 --- a/hal/mpfs250.h +++ b/hal/mpfs250.h @@ -64,6 +64,7 @@ /* Peripheral Soft Reset Control Register (offset 0x88) */ #define SYSREG_SOFT_RESET_CR (*((volatile uint32_t*)(SYSREG_BASE + 0x88))) +#define SYSREG_SOFT_RESET_CR_QSPI (1U << 19) /* MSS Peripheral control bits (shared by SUBBLK_CLOCK_CR and SOFT_RESET_CR) */ #define MSS_PERIPH_ENVM (1U << 0) @@ -75,6 +76,18 @@ #define MSS_PERIPH_MMUART4 (1U << 9) #define MSS_PERIPH_QSPI (1U << 19) +/* MSS Watchdog Timer (per-hart) */ +#define MSS_WDT_E51_BASE 0x20001000UL +#define MSS_WDT_U54_1_BASE 0x20101000UL +#define MSS_WDT_U54_2_BASE 0x20103000UL +#define MSS_WDT_U54_3_BASE 0x20105000UL +#define MSS_WDT_U54_4_BASE 0x20107000UL +#define MSS_WDT_REFRESH(base) (*(volatile uint32_t*)((base) + 0x00)) +#define MSS_WDT_CONTROL(base) (*(volatile uint32_t*)((base) + 0x04)) +#define MSS_WDT_STATUS(base) (*(volatile uint32_t*)((base) + 0x08)) +#define MSS_WDT_TIME(base) (*(volatile uint32_t*)((base) + 0x0C)) +#define MSS_WDT_MVRP(base) (*(volatile uint32_t*)((base) + 0x10)) +#define MSS_WDT_CTRL_ENABLE (1U << 0) /* UART */ #define MSS_UART0_LO_BASE 0x20000000UL diff --git a/src/boot_riscv.c b/src/boot_riscv.c index b7b814279c..9f403bd169 100644 --- a/src/boot_riscv.c +++ b/src/boot_riscv.c @@ -25,9 +25,7 @@ #include "image.h" #include "loader.h" -#ifdef DEBUG_BOOT #include "printf.h" -#endif /* Include platform-specific headers (may define PLIC_BASE) */ #ifdef TARGET_mpfs250 @@ -142,14 +140,31 @@ unsigned long WEAKFUNCTION handle_trap(unsigned long cause, unsigned long epc, last_epc = epc; last_tval = tval; -#ifdef DEBUG_BOOT - /* Debug: print trap info for synchronous exceptions (not interrupts) */ + /* Always print and halt on synchronous exceptions to prevent + * infinite trap-mret loops that appear as silent hangs. + * NOTE: keep each printf SIMPLE (few args) to minimize the risk of + * recursive traps if wolfBoot's state is corrupted. */ if (!(cause & MCAUSE_INT)) { - wolfBoot_printf("TRAP: cause=%lx epc=%lx tval=%lx\n", cause, epc, - tval); + wolfBoot_printf("TRAP: cause=%lx epc=%lx tval=%lx\n", + cause, epc, tval); +#if defined(DEBUG_BOOT) + unsigned long sp_now; + __asm__ volatile("mv %0, sp" : "=r"(sp_now)); + wolfBoot_printf(" sp=%lx\n", sp_now); +#if defined(WOLFBOOT_RISCV_MMODE) && defined(TARGET_mpfs250) + /* Detect stack overflow by comparing SP to linker-defined + * stack bottom. Trap entry pushes 128 bytes before calling + * here, so the trapping SP was slightly higher. */ + extern uint8_t _main_hart_stack_bottom[]; + unsigned long bottom = (unsigned long)_main_hart_stack_bottom; + if (sp_now < bottom) { + wolfBoot_printf("STACK OVERFLOW: under by %lu\n", + bottom - sp_now); + } +#endif +#endif /* DEBUG_BOOT */ while (1) ; /* halt to prevent infinite trap-mret loop */ } -#endif #ifdef PLIC_BASE /* Check if this is an interrupt (MSB set) */ diff --git a/src/boot_riscv_start.S b/src/boot_riscv_start.S index 7ecb0d9bd0..491169093f 100644 --- a/src/boot_riscv_start.S +++ b/src/boot_riscv_start.S @@ -59,12 +59,24 @@ _reset: #ifdef TARGET_mpfs250 /* Enable L2 ways (mask 0x0B: ways 0, 1, 3) and clear shutdown * before copying text to L2 scratchpad. */ - li t1, 0x02010000 + li t1, 0x02010000 /* L2_CACHE_BASE */ li t2, 0x0B - sd t2, 8(t1) /* L2_WAY_ENABLE */ + sd t2, 8(t1) /* L2_WAY_ENABLE */ fence - li t1, 0x20002000 - sw zero, 0x174(t1) /* SYSREG_L2_SHUTDOWN_CR = 0 */ + li t1, 0x20002000 /* SYSREG_BASE */ + sw zero, 0x174(t1) /* SYSREG_L2_SHUTDOWN_CR = 0 */ + fence + + /* Route E51 D-cache to scratchpad ways (8-11) for the copy. + * With the default mask (0xFF = cache ways 0-7), stores to the + * Zero Device (0x0A000000) land in cache and never reach the + * scratchpad SRAM. The I-cache later fetches from scratchpad and + * gets uninitialized data (zeros), causing illegal-instruction + * traps. Setting the mask to 0xF00 forces stores into the actual + * scratchpad SRAM. See HSS mss_l2_cache.c config_l2_cache(). */ + li t4, 0x02010828 /* L2_WAY_MASK_E51_DCACHE */ + li t5, 0xF00 /* scratchpad ways 8-11 */ + sd t5, 0(t4) fence #endif @@ -85,6 +97,12 @@ _reset: addi t1, t1, 8 j .L_copy_text .L_copy_text_done: +#ifdef TARGET_mpfs250 + /* Restore D-cache to cache-only ways before normal execution */ + li t5, 0xFF /* cache ways 0-7 */ + sd t5, 0(t4) /* WAY_MASK_E51_DCACHE = cache-only */ +#endif + fence rw, rw fence.i /* flush icache before jumping to SRAM */ lui t0, %hi(.L_sram_entry) @@ -256,6 +274,7 @@ _copy_params: j .L_bss_clear .L_bss_clear_done: + #ifndef TARGET_mpfs250 /* Clear SiFive bus error unit accrued registers (not present on MPFS) */ la a4,0x01700020UL diff --git a/src/update_ram.c b/src/update_ram.c index b2f0d77212..174f321ac4 100644 --- a/src/update_ram.c +++ b/src/update_ram.c @@ -44,6 +44,10 @@ extern int wolfBoot_get_dts_size(void *dts_addr); extern uint32_t kernel_load_addr; extern uint32_t dts_load_addr; +#if defined(__WOLFBOOT) && defined(WOLFBOOT_LOAD_ADDRESS) +extern uint8_t _end[]; /* linker symbol: end of wolfBoot BSS */ +#endif + #if ((defined(EXT_FLASH) && defined(NO_XIP)) || \ (defined(EXT_ENCRYPTED) && defined(MMU))) && \ !defined(WOLFBOOT_NO_RAMBOOT) @@ -98,6 +102,16 @@ int wolfBoot_ramboot(struct wolfBoot_image *img, uint8_t *src, uint8_t *dst) } #endif +#if defined(__WOLFBOOT) && defined(WOLFBOOT_LOAD_ADDRESS) + /* Runtime overlap check: ensure image destination does not overwrite + * wolfBoot's own code/data/bss in RAM. */ + if ((uintptr_t)dst < (uintptr_t)_end) { + wolfBoot_printf("Error: image dest %p overlaps wolfBoot end %p\n", + dst, _end); + return -1; + } +#endif + /* Read the entire image into RAM */ wolfBoot_printf("Loading image %d bytes from %p to %p...", img_size, src + IMAGE_HEADER_SIZE, dst + IMAGE_HEADER_SIZE); @@ -212,6 +226,7 @@ void RAMFUNCTION wolfBoot_start(void) goto backup_on_failure; } BENCHMARK_END("done"); + #endif { diff --git a/tools/scripts/mpfs_qspi_prog.py b/tools/scripts/mpfs_qspi_prog.py index 46e553b321..6126b902ae 100755 --- a/tools/scripts/mpfs_qspi_prog.py +++ b/tools/scripts/mpfs_qspi_prog.py @@ -177,8 +177,13 @@ def main(): sys.exit(1) chunk = data[sent : sent + CHUNK_SIZE] - port.write(chunk) - port.flush() + # Send in small pieces: some USB-UART bridges (e.g., PolarFire + # Video Kit) stall on bulk writes. 8-byte pieces with 10ms + # pauses prevent the bridge's TX FIFO from stalling. + for ci in range(0, len(chunk), 8): + port.write(chunk[ci:ci+8]) + port.flush() + time.sleep(0.010) sent += len(chunk) pct = sent * 100 // total From 64815eac656da48afc51f1f45a431c3217fa3262 Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 13 Apr 2026 15:53:36 -0700 Subject: [PATCH 2/2] Peer review fixes --- config/examples/polarfire_mpfs250_m_qspi.config | 3 ++- docs/Targets.md | 7 ++++--- hal/mpfs250-m.ld | 4 +++- hal/mpfs250.c | 16 ++++++++++------ hal/mpfs250.h | 1 + hal/sama5d3.ld | 3 ++- src/boot_riscv_start.S | 1 - tools/scripts/mpfs_qspi_prog.py | 8 +++++++- 8 files changed, 29 insertions(+), 14 deletions(-) diff --git a/config/examples/polarfire_mpfs250_m_qspi.config b/config/examples/polarfire_mpfs250_m_qspi.config index 192daa471c..be18785c87 100644 --- a/config/examples/polarfire_mpfs250_m_qspi.config +++ b/config/examples/polarfire_mpfs250_m_qspi.config @@ -59,7 +59,8 @@ OPTIMIZATION_LEVEL=1 RISCV_MMODE?=1 # Stack size per hart: set to 0 for M-mode (only E51/hart 0 runs; -# secondary harts park in eNVM WFI loop and never use L2 Scratch stacks) +# secondary harts park in eNVM WFI loop and never use L2 Scratch stacks). +# The linker script (mpfs250-m.ld) uses STACK_SIZE_PER_HART = 0 to match. CFLAGS_EXTRA+=-DSTACK_SIZE_PER_HART=0 # E51 core lacks RISC-V crypto extensions (Zknh), use portable C implementations diff --git a/docs/Targets.md b/docs/Targets.md index 30216066db..80ce3c43e5 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -838,11 +838,12 @@ These flags apply to `polarfire_mpfs250_m_qspi.config` and are added via `CFLAGS #### Stack overflow detection -The trap handler in `src/boot_riscv.c` automatically detects stack overflow on synchronous exceptions. When a trap fires with `SP < _main_hart_stack_bottom`, it prints: +The trap handler in `src/boot_riscv.c` automatically detects stack overflow on synchronous exceptions (requires `DEBUG_BOOT`). When a trap fires with `SP < _main_hart_stack_bottom`, it prints: ``` -TRAP: cause=2 epc=A000740 tval=0 sp=A02FFE8 -STACK OVERFLOW: sp=A02FFE8 < bottom=A030000 (under by 24) +TRAP: cause=2 epc=A000740 tval=0 + sp=A02FFE8 +STACK OVERFLOW: under by 24 ``` This is helpful for diagnosing illegal-instruction TRAPs at random valid `.text` addresses, which are the classic signature of stack overflow corrupting the return address. diff --git a/hal/mpfs250-m.ld b/hal/mpfs250-m.ld index f0f8aa76b9..95de90aea7 100644 --- a/hal/mpfs250-m.ld +++ b/hal/mpfs250-m.ld @@ -119,7 +119,9 @@ PROVIDE(_start_heap = _end); * * Total stack area: STACK_SIZE + 4 * STACK_SIZE_PER_HART */ -PROVIDE(STACK_SIZE_PER_HART = 8192); +/* M-mode: only E51 (hart 0) runs; secondary harts park in eNVM WFI loop. + * Set to 0 so no L2 Scratch is wasted on phantom secondary stacks. */ +PROVIDE(STACK_SIZE_PER_HART = 0); /* End of L2 scratchpad */ PROVIDE(_l2_scratch_end = ORIGIN(L2_SCRATCH) + LENGTH(L2_SCRATCH)); diff --git a/hal/mpfs250.c b/hal/mpfs250.c index 22f9711302..88afd50bbc 100644 --- a/hal/mpfs250.c +++ b/hal/mpfs250.c @@ -122,8 +122,9 @@ extern uint8_t _main_hart_hls; /* linker-provided address symbol; typed as uint8 # define WATCHDOG_TIMEOUT_TICKS ((WATCHDOG_TIMEOUT_MS) * 300U) #endif -/* Saved boot ROM watchdog value, restored in hal_prepare_boot() */ +/* Saved boot ROM watchdog values, restored in hal_prepare_boot() */ static uint32_t mpfs_wdt_default_mvrp = 0; +static uint32_t mpfs_wdt_default_ctrl = 0; /* CLINT MSIP register for IPI delivery */ @@ -178,8 +179,9 @@ static void qspi_uart_program(void); void hal_init(void) { #ifdef WOLFBOOT_RISCV_MMODE - /* Capture boot ROM WDT default for restoration in hal_prepare_boot() */ + /* Capture boot ROM WDT defaults for restoration in hal_prepare_boot() */ mpfs_wdt_default_mvrp = MSS_WDT_MVRP(MSS_WDT_E51_BASE); + mpfs_wdt_default_ctrl = MSS_WDT_CONTROL(MSS_WDT_E51_BASE); #ifndef WATCHDOG /* WATCHDOG=0 (default): disable WDT for the duration of wolfBoot. @@ -191,6 +193,7 @@ void hal_init(void) * never have to pet the WDT during ECDSA verify. */ MSS_WDT_REFRESH(MSS_WDT_E51_BASE) = 0xDEADC0DEU; MSS_WDT_MVRP(MSS_WDT_E51_BASE) = WATCHDOG_TIMEOUT_TICKS; + MSS_WDT_CONTROL(MSS_WDT_E51_BASE) |= MSS_WDT_CTRL_ENABLE; #endif mpfs_config_l2_cache(); @@ -407,12 +410,13 @@ int hal_dts_fixup(void* dts_addr) void hal_prepare_boot(void) { #ifdef WOLFBOOT_RISCV_MMODE - /* Restore boot ROM WDT default so the application sees a normal WDT. + /* Restore boot ROM WDT defaults so the application sees a normal WDT. * Refresh first so the timer doesn't fire immediately after we apply - * the new MVRP. Re-enable in case it was disabled by hal_init(). */ + * the new MVRP. Restore the original CONTROL value (including the + * enable bit) rather than unconditionally enabling. */ MSS_WDT_REFRESH(MSS_WDT_E51_BASE) = 0xDEADC0DEU; MSS_WDT_MVRP(MSS_WDT_E51_BASE) = mpfs_wdt_default_mvrp; - MSS_WDT_CONTROL(MSS_WDT_E51_BASE) |= MSS_WDT_CTRL_ENABLE; + MSS_WDT_CONTROL(MSS_WDT_E51_BASE) = mpfs_wdt_default_ctrl; #endif /* reset the eMMC/SD card? */ } @@ -1339,7 +1343,7 @@ static void uart_init_base(unsigned long base) MMUART_IER(base) = 0u; MMUART_FCR(base) = CLEAR_RX_FIFO_MASK | CLEAR_TX_FIFO_MASK | RXRDY_TXRDYN_EN_MASK; MMUART_MCR(base) &= ~(LOOP_MASK | RLOOP_MASK); - MMUART_MCR(base) |= (1U << 1); /* Assert RTS — required for USB-UART bridge CTS */ + MMUART_MCR(base) |= RTS_MASK; /* Assert RTS — required for USB-UART bridge CTS */ MMUART_MM1(base) &= ~(E_MSB_TX_MASK | E_MSB_RX_MASK); MMUART_MM2(base) &= ~(EAFM_MASK | ESWM_MASK); MMUART_MM0(base) &= ~(ETTG_MASK | ERTO_MASK | EFBR_MASK); diff --git a/hal/mpfs250.h b/hal/mpfs250.h index f6983e892e..552ad01921 100644 --- a/hal/mpfs250.h +++ b/hal/mpfs250.h @@ -168,6 +168,7 @@ extern const unsigned long MSS_UART_BASE_ADDR[5]; #define CLEAR_RX_FIFO_MASK (1U << 1) /* Clear receiver FIFO */ #define CLEAR_TX_FIFO_MASK (1U << 2) /* Clear transmitter FIFO */ +#define RTS_MASK (1U << 1) /* Request To Send */ #define LOOP_MASK (1U << 4) /* Local loopback */ #define RLOOP_MASK (1U << 5) /* Remote loopback & Automatic echo*/ diff --git a/hal/sama5d3.ld b/hal/sama5d3.ld index 20316d0582..fbf975a23d 100644 --- a/hal/sama5d3.ld +++ b/hal/sama5d3.ld @@ -38,11 +38,12 @@ SECTIONS } /* collect all uninitialized .bss sections */ - .bss (NOLOAD) : { + .bss (NOLOAD) : { . = ALIGN(4); _start_bss = .; *(.bss) _end_bss = .; + _end = .; } } diff --git a/src/boot_riscv_start.S b/src/boot_riscv_start.S index 491169093f..7fab6e8ebc 100644 --- a/src/boot_riscv_start.S +++ b/src/boot_riscv_start.S @@ -274,7 +274,6 @@ _copy_params: j .L_bss_clear .L_bss_clear_done: - #ifndef TARGET_mpfs250 /* Clear SiFive bus error unit accrued registers (not present on MPFS) */ la a4,0x01700020UL diff --git a/tools/scripts/mpfs_qspi_prog.py b/tools/scripts/mpfs_qspi_prog.py index 6126b902ae..dfc4b0f8df 100755 --- a/tools/scripts/mpfs_qspi_prog.py +++ b/tools/scripts/mpfs_qspi_prog.py @@ -57,7 +57,13 @@ def wait_for(port, keyword, timeout_sec, label=""): while time.monotonic() < deadline: remaining = deadline - time.monotonic() port.timeout = min(1.0, remaining) - line = port.readline() + try: + line = port.readline() + except serial.SerialException: + # JTAG reset can cause a brief USB-UART glitch on PTY proxies. + # Tolerate the transient disconnect and keep trying. + time.sleep(0.5) + continue if not line: continue text = line.decode("ascii", errors="replace").rstrip()