From 00459d8f928165524a521e017046da847cee871d Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 15 Apr 2026 14:16:04 -0700 Subject: [PATCH 1/2] ZynqMP ZCU102 SD-card Linux boot: EL2 cleanup, DTS bootargs, SDHCI init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the pieces needed to boot Linux end-to-end from the ZCU102 SD card with wolfBoot at EL2: * src/boot_aarch64_start.S: new el2_flush_and_disable_mmu helper that cleans D-cache to PoC, invalidates I-cache to PoU, and clears SCTLR_EL2.{M,C,I}, then returns. Satisfies the ARM64 Linux boot protocol and is also correct for any other payload that sets up its own translation (hypervisor, RTOS, later bootloader stage). * src/boot_aarch64.c: call el2_flush_and_disable_mmu from do_boot() on the EL2 direct-jump path before falling through to the br x4 block. Also pull in hal/zynq.h and hal/nxp_ls1028a.h so the EL_HYPERVISOR / BOOT_EL1 guards compile for those targets. * hal/zynq.c: implement hal_dts_fixup() — set /chosen/bootargs from LINUX_BOOTARGS (with a LINUX_BOOTARGS_ROOT default of /dev/mmcblk0p4) and grow DTB totalsize by 512 bytes to give fdt_setprop() headroom (matches hal/versal.c). Add hal_get_timer_us() via CNTPCT_EL0. * src/sdhci.c: add a 1 ms settling delay after sdhci_platform_init() and a CMD0 retry loop (up to 10 x 10 ms) so the ZCU102 Arasan controller reliably detects the card after the slot-type change + soft reset. * config/examples/zynqmp_sdcard.config: stay at EL2 by default (comment out BOOT_EL1), default rootfs to /dev/mmcblk0p4, turn DEBUG off. * hal/versal.c: correct the default LINUX_BOOTARGS_ROOT to /dev/mmcblk0p4 to match the shipped MBR layout. * docs/Targets.md: note the unconditional EL2 cleanup in the ZynqMP and Versal SD-card sections. Behavior change: non-Linux AArch64 EL2 payloads now enter with MMU off and caches clean instead of inheriting wolfBoot's tables. No in-tree payload relies on the old state leakage. --- config/examples/zynqmp_sdcard.config | 19 +++-- docs/Targets.md | 4 + hal/versal.c | 2 +- hal/zynq.c | 71 ++++++++++++++++- src/boot_aarch64.c | 39 ++++++++- src/boot_aarch64_start.S | 114 +++++++++++++++++++++++++++ src/sdhci.c | 28 ++++++- 7 files changed, 263 insertions(+), 14 deletions(-) diff --git a/config/examples/zynqmp_sdcard.config b/config/examples/zynqmp_sdcard.config index 00f62c4013..1f22c95acf 100644 --- a/config/examples/zynqmp_sdcard.config +++ b/config/examples/zynqmp_sdcard.config @@ -18,7 +18,7 @@ HASH?=SHA3 IMAGE_HEADER_SIZE?=1024 # Debug options -DEBUG?=1 +DEBUG?=0 DEBUG_SYMBOLS=1 DEBUG_UART=1 CFLAGS_EXTRA+=-DDEBUG_ZYNQ=1 @@ -39,8 +39,12 @@ NO_XIP=1 # ELF loading support ELF?=1 -# Boot Exception Level: transition from EL2 -> EL1 before jumping to app -BOOT_EL1?=1 +# Boot Exception Level: leave wolfBoot at EL2 for handoff to Linux (matches +# the standard PetaLinux U-Boot flow and preserves KVM/hypervisor use of +# EL2). The EL2 Linux-cleanup path in do_boot() will clean dcache/disable +# MMU before jumping to the kernel. To drop to EL1 via ERET instead, set +# BOOT_EL1?=1 (requires EL2_HYPERVISOR=1, which is the hal/zynq.h default). +#BOOT_EL1?=1 # General options VTOR?=1 @@ -78,8 +82,13 @@ CFLAGS_EXTRA+=-DBOOT_PART_B=2 # Disk read chunk size (512KB) CFLAGS_EXTRA+=-DDISK_BLOCK_SIZE=0x80000 -# Linux rootfs is on partition 4 (SD1 = mmcblk1) -CFLAGS_EXTRA+=-DLINUX_BOOTARGS_ROOT=\"/dev/mmcblk1p4\" +# Linux rootfs is on partition 4. Device naming depends on whether both +# ZynqMP SDHCI controllers are enabled in the XSA / device tree: +# * both sdhci0 + sdhci1 enabled -> SD1 = /dev/mmcblk1 +# * only sdhci1 enabled (ZCU102 default -> only external SD populated) +# -> SD1 = /dev/mmcblk0 +# Check `ls /sys/class/mmc_host/` on your running target to confirm. +CFLAGS_EXTRA+=-DLINUX_BOOTARGS_ROOT=\"/dev/mmcblk0p4\" # ============================================================================ # Boot Memory Layout diff --git a/docs/Targets.md b/docs/Targets.md index 6c915ee22d..f8d68a81f4 100644 --- a/docs/Targets.md +++ b/docs/Targets.md @@ -2599,6 +2599,8 @@ qemu-system-aarch64 -machine xlnx-zcu102 -cpu cortex-a53 -serial stdio -display Use `config/examples/zynqmp_sdcard.config`. This uses the Arasan SDHCI controller (SD1 - external SD card slot on ZCU102) and an **MBR** partitioned SD card. +wolfBoot unconditionally flushes the EL2 D-cache/I-cache and disables the EL2 MMU before handoff (see `el2_flush_and_disable_mmu` in `src/boot_aarch64_start.S`), satisfying the ARM64 Linux boot protocol with no extra config flag required. + **Partition layout** | Partition | Name | Size | Type | Contents | |-----------|--------|-----------|-------------------------------|-------------------------------------------| @@ -3005,6 +3007,8 @@ Typical boot timing with ECC384/SHA384 signing: Use `config/examples/versal_vmk180_sdcard.config`. This uses the Arasan SDHCI controller and an **MBR** partitioned SD card. +Versal defaults to `BOOT_EL1` — the handoff goes through `el2_to_el1_boot` (ERET to EL1). Custom `BOOT_EL2` Versal configs get the same EL2 cache/MMU teardown as ZynqMP via `el2_flush_and_disable_mmu` in `src/boot_aarch64_start.S`, so no extra config flag is needed to boot Linux directly at EL2. + **Partition layout** | Partition | Name | Size | Type | Contents | |-----------|------|------|------|----------| diff --git a/hal/versal.c b/hal/versal.c index 5423e7c720..baf8aadd7a 100644 --- a/hal/versal.c +++ b/hal/versal.c @@ -66,7 +66,7 @@ /* Linux kernel command line arguments */ #ifndef LINUX_BOOTARGS #ifndef LINUX_BOOTARGS_ROOT -#define LINUX_BOOTARGS_ROOT "/dev/mmcblk0p2" +#define LINUX_BOOTARGS_ROOT "/dev/mmcblk0p4" #endif #define LINUX_BOOTARGS \ diff --git a/hal/zynq.c b/hal/zynq.c index 37ff2ee181..1462e220d3 100644 --- a/hal/zynq.c +++ b/hal/zynq.c @@ -57,6 +57,20 @@ /* QSPI bare-metal */ #endif +/* DTB fixup for kernel command line. Override LINUX_BOOTARGS or + * LINUX_BOOTARGS_ROOT in your config to customize. + * + * Note: console=ttyPS0 is ZynqMP-specific (PS UART0). Versal's default + * (hal/versal.c) omits the console= token because Versal relies on + * earlycon alone plus a DT-declared stdout-path. */ +#ifndef LINUX_BOOTARGS +#ifndef LINUX_BOOTARGS_ROOT +#define LINUX_BOOTARGS_ROOT "/dev/mmcblk0p4" +#endif +#define LINUX_BOOTARGS \ + "earlycon console=ttyPS0,115200 root=" LINUX_BOOTARGS_ROOT " rootwait" +#endif + /* QSPI Slave Device Information */ typedef struct QspiDev { uint32_t mode; /* GQSPI_GEN_FIFO_MODE_SPI, GQSPI_GEN_FIFO_MODE_DSPI or GQSPI_GEN_FIFO_MODE_QSPI */ @@ -1795,7 +1809,20 @@ void RAMFUNCTION ext_flash_unlock(void) } -#ifdef MMU +#if defined(MMU) && defined(__WOLFBOOT) +/* Get current time in microseconds using ARMv8 generic timer */ +uint64_t hal_get_timer_us(void) +{ + uint64_t count, freq; + __asm__ volatile("mrs %0, CNTPCT_EL0" : "=r"(count)); + __asm__ volatile("mrs %0, CNTFRQ_EL0" : "=r"(freq)); + if (freq == 0) + return 0; + /* Use __uint128_t to avoid overflow of (count * 1e6) at long uptimes + * (would overflow uint64_t after ~51h at 100MHz). */ + return (uint64_t)(((__uint128_t)count * 1000000ULL) / freq); +} + void* hal_get_dts_address(void) { #ifdef WOLFBOOT_DTS_BOOT_ADDRESS @@ -1809,8 +1836,46 @@ void* hal_get_dts_address(void) int hal_dts_fixup(void* dts_addr) { - /* place FDT fixup specific to ZynqMP here */ - //fdt_set_boot_cpuid_phys(buf, fdt_boot_cpuid_phys(fdt)); + int off, ret; + struct fdt_header *fdt = (struct fdt_header *)dts_addr; + + /* Verify FDT header */ + ret = fdt_check_header(dts_addr); + if (ret != 0) { + wolfBoot_printf("FDT: Invalid header! %d\n", ret); + return ret; + } + + wolfBoot_printf("FDT: Version %d, Size %d\n", + fdt_version(fdt), fdt_totalsize(fdt)); + + /* Expand totalsize so fdt_setprop() has in-blob free space to place + * a new/larger bootargs property. Physical headroom is already + * guaranteed by the load-address layout (DTB at WOLFBOOT_LOAD_DTS_ADDRESS, + * kernel loaded much higher), so growing the header is safe. Matches + * the pattern used in hal/versal.c:hal_dts_fixup. */ + fdt_set_totalsize(fdt, fdt_totalsize(fdt) + 512); + + /* Find /chosen node */ + off = fdt_find_node_offset(fdt, -1, "chosen"); + if (off < 0) { + /* Create /chosen node if it doesn't exist */ + off = fdt_add_subnode(fdt, 0, "chosen"); + } + if (off < 0) { + wolfBoot_printf("FDT: Failed to find/create chosen node (%d)\n", off); + return off; + } + + /* Set bootargs property - overrides PetaLinux default root= with + * the wolfBoot partition layout. */ + wolfBoot_printf("FDT: Setting bootargs: %s\n", LINUX_BOOTARGS); + ret = fdt_fixup_str(fdt, off, "chosen", "bootargs", LINUX_BOOTARGS); + if (ret < 0) { + wolfBoot_printf("FDT: Failed to set bootargs (%d)\n", ret); + return ret; + } + return 0; } #endif diff --git a/src/boot_aarch64.c b/src/boot_aarch64.c index 7b8fccbadc..495a5ae61f 100644 --- a/src/boot_aarch64.c +++ b/src/boot_aarch64.c @@ -26,9 +26,16 @@ #include "printf.h" #include "wolfboot/wolfboot.h" -/* Include platform-specific header for EL configuration defines */ -#ifdef TARGET_versal +/* Include platform-specific header for EL configuration defines + * (EL2_HYPERVISOR, etc.). Must be visible here so the BOOT_EL1 / + * EL2_HYPERVISOR guards around the EL2->EL1 ERET transition below + * compile in for the active target. */ +#if defined(TARGET_versal) #include "hal/versal.h" +#elif defined(TARGET_zynq) +#include "hal/zynq.h" +#elif defined(TARGET_ls1028a) +#include "hal/nxp_ls1028a.h" #endif /* Linker exported variables */ @@ -43,6 +50,17 @@ extern unsigned int _end_data; extern void main(void); extern void gicv2_init_secure(void); +/* Asm helper in boot_aarch64_start.S: cleans the entire D-cache to PoC, + * invalidates the I-cache to PoU, and disables MMU + I-cache + D-cache + * via SCTLR_EL2, then returns. Required before handoff to any payload + * that sets up its own translation (Linux kernel, hypervisor, bare-metal + * RTOS, later bootloader stage), and mandatory for the ARM64 Linux boot + * protocol. Only built when EL2_HYPERVISOR == 1 is visible to + * boot_aarch64_start.S (e.g. via hal/zynq.h on ZynqMP). */ +#if defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 +extern void el2_flush_and_disable_mmu(void); +#endif + /* SKIP_GIC_INIT - Skip GIC initialization before booting app * This is needed for: * - Versal: Uses GICv3, not GICv2. BL31 handles GIC setup. @@ -163,7 +181,22 @@ void RAMFUNCTION do_boot(const uint32_t *app_offset) el2_to_el1_boot((uintptr_t)app_offset, dts); } #else - /* Stay at current EL (EL2 or EL3) and jump directly to application */ + /* Stay at current EL (EL2 or EL3) and jump directly to application. + * + * Before the jump, tear down wolfBoot's EL2 MMU/caches so the next + * stage enters with a clean state. Mandatory for the ARM64 Linux + * boot protocol (Linux's arm64_panic_block_init() panics with + * "Non-EFI boot detected with MMU and caches enabled" otherwise), + * and correct for any payload that sets up its own translation + * (hypervisor, RTOS, later bootloader stage). */ +#if defined(MMU) && defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 + if (current_el() == 2) { + wolfBoot_printf("do_boot: flushing caches, disabling MMU\n"); + el2_flush_and_disable_mmu(); + } +#endif + + /* Non-Linux EL2 and EL3 path: legacy direct br x4 */ /* Set application address via x4 */ asm volatile("mov x4, %0" : : "r"(app_offset)); diff --git a/src/boot_aarch64_start.S b/src/boot_aarch64_start.S index 03b6359e71..195c8ef03c 100644 --- a/src/boot_aarch64_start.S +++ b/src/boot_aarch64_start.S @@ -1334,4 +1334,118 @@ el2_to_el1_boot: b . #endif /* BOOT_EL1 && EL2_HYPERVISOR */ + +/* + * Clean entire D-cache to the Point of Coherency (PoC), invalidate the + * I-cache to the Point of Unification (PoU), and disable MMU + I/D-cache + * at EL2. Returns normally to the caller. + * + * Terminology (ARM ARM B2.8): + * PoC - Point of Coherency: the point at which all observers (CPUs, + * DMA masters, etc.) see the same memory. Cleaning to PoC + * guarantees the image bytes we memcpy'd are visible to the + * next stage's first uncached instruction fetches. + * PoU - Point of Unification: the point at which instruction and data + * caches converge. Invalidating I-cache to PoU ensures stale + * fetches are discarded before we hand off. + * + * wolfBoot's startup (line ~347 above) enables MMU+I+D cache at EL2 for + * its own use. Any payload we hand off to (Linux kernel, hypervisor, + * bare-metal RTOS, a later bootloader stage) expects to enter without + * inheriting wolfBoot's translation tables, and the ARM64 Linux boot + * protocol (Documentation/arch/arm64/booting.rst) explicitly REQUIRES + * MMU off, D-cache off, and the loaded image cleaned to PoC. This + * helper performs that teardown and returns; the caller then performs + * the actual jump with whatever ABI the payload expects. + * + * Safe to return because wolfBoot's .text is identity-mapped (VA=PA) + * at EL2, so instruction fetch keeps working after SCTLR_EL2.M is + * cleared. + * + * AAPCS64: clobbers x0-x11; x30 (LR) is preserved because the + * set/way loop body does not touch it. + */ +#if defined(EL2_HYPERVISOR) && EL2_HYPERVISOR == 1 +.global el2_flush_and_disable_mmu +el2_flush_and_disable_mmu: + /* ---- 1. Clean & invalidate entire data cache to PoC by set/way ---- + * Standard ARMv8 routine, adapted from arm-trusted-firmware / + * U-Boot / Linux. Iterates every (level, set, way) triple and + * issues `dc cisw` on it. Terminates at the Level of Coherency + * (LoC) read from CLIDR_EL1. */ + mrs x0, clidr_el1 + and x3, x0, #0x07000000 /* x3 = LoC (level of coherency) */ + lsr x3, x3, #23 /* x3 = LoC * 2 */ + cbz x3, .Ldcache_done + mov x10, #0 /* x10 = current cache level << 1 */ + +.Ldcache_level_loop: + add x2, x10, x10, lsr #1 /* x2 = level * 3 */ + lsr x1, x0, x2 /* x1 = ctype field for this level */ + and x1, x1, #7 + cmp x1, #2 + b.lt .Ldcache_skip_level /* No data cache at this level */ + msr csselr_el1, x10 /* Select cache level (instruction = 0) */ + isb + mrs x1, ccsidr_el1 + and x2, x1, #7 /* x2 = log2(line length) - 4 */ + add x2, x2, #4 /* x2 = log2(line length) */ + mov x4, #0x3ff + and x4, x4, x1, lsr #3 /* x4 = max way number */ + clz w5, w4 /* x5 = bit position of way size */ + mov x7, #0x7fff + and x7, x7, x1, lsr #13 /* x7 = max set number */ + +.Ldcache_set_loop: + mov x9, x4 /* x9 = current way */ +.Ldcache_way_loop: + lsl x6, x9, x5 + orr x11, x10, x6 /* level | way */ + lsl x6, x7, x2 + orr x11, x11, x6 /* level | way | set */ + dc cisw, x11 /* clean & invalidate by set/way */ + subs x9, x9, #1 + b.ge .Ldcache_way_loop + subs x7, x7, #1 + b.ge .Ldcache_set_loop + +.Ldcache_skip_level: + add x10, x10, #2 + cmp x3, x10 + b.gt .Ldcache_level_loop + +.Ldcache_done: + mov x10, #0 + msr csselr_el1, x10 + dsb sy + isb + + /* ---- 2. Invalidate entire I-cache to PoU ---- + * `ic iallu` invalidates all instruction cache to the Point of + * Unification for the local PE. */ + ic iallu + dsb ish + isb + + /* ---- 3. Disable MMU + I-cache + D-cache at EL2 ---- + * SCTLR_EL2.M (bit 0) = MMU enable + * SCTLR_EL2.C (bit 2) = D-cache enable + * SCTLR_EL2.I (bit 12) = I-cache enable + * + * ARM ARM (B2.7.2) requires `dsb sy` before `isb` when modifying + * SCTLR_ELx.M so the system register write is observable before the + * pipeline is re-synchronized. Matches the MMU-enable sequence used + * earlier in this file. + */ + mrs x0, SCTLR_EL2 + bic x0, x0, #(1 << 0) /* M */ + bic x0, x0, #(1 << 2) /* C */ + bic x0, x0, #(1 << 12) /* I */ + msr SCTLR_EL2, x0 + dsb sy + isb + + ret +#endif /* EL2_HYPERVISOR */ + .end diff --git a/src/sdhci.c b/src/sdhci.c index ba82947228..222ca5daf9 100644 --- a/src/sdhci.c +++ b/src/sdhci.c @@ -581,6 +581,7 @@ static uint32_t sdhci_get_response_bits(int from, int count) /* voltage: 0=off or SDHCI_SRS10_BVS_[X_X]V */ static int sdcard_power_init_seq(uint32_t voltage) { + int retries; /* Set power to specified voltage */ int status = sdhci_set_power(voltage); #ifdef DEBUG_SDHCI @@ -590,9 +591,24 @@ static int sdcard_power_init_seq(uint32_t voltage) SDHCI_REG(SDHCI_SRS09), SDHCI_REG(SDHCI_SRS10), SDHCI_REG(SDHCI_SRS11), SDHCI_REG(SDHCI_SRS12)); #endif - if (status == 0) { - /* send CMD0 (go idle) to reset card */ + if (status != 0) + return status; + /* SD spec requires >= 1ms after power stabilizes before CMD0. */ + udelay(1000); + /* Some cards and the ZynqMP Arasan controller need more settling + * time after the slot-type change + soft reset in sdhci_platform_init(). + * Use a retry loop: if CMD0 fails, wait and retry (self-calibrating). */ + for (retries = 0; retries < 10; retries++) { status = sdhci_cmd(MMC_CMD0_GO_IDLE, 0, SDHCI_RESP_NONE); + if (status == 0) + break; + udelay(10000); /* 10ms between retries */ + } + if (status != 0) { + wolfBoot_printf("SD: CMD0 failed after %d retries\n", retries); + } + else if (retries > 0) { + wolfBoot_printf("SD: CMD0 succeeded after %d retries\n", retries); } if (status == 0) { /* send the operating conditions command */ @@ -1387,6 +1403,11 @@ int sdhci_init(void) /* Call platform-specific initialization (clocks, resets, pin mux) */ sdhci_platform_init(); + /* Allow controller to settle after platform init (slot type change, + * soft reset, clock configuration). Without this, the controller may + * not be ready to accept register writes on some platforms. */ + udelay(1000); /* 1ms */ + /* Reset the host controller */ sdhci_reg_or(SDHCI_HRS00, SDHCI_HRS00_SWR); /* Bit will clear when reset is done */ @@ -1482,6 +1503,9 @@ int sdhci_init(void) /* Setup 400khz starting clock */ sdhci_set_clock(SDHCI_CLK_400KHZ); + /* Allow clock to stabilize before issuing first command */ + udelay(1000); /* 1ms */ + #ifdef DISK_EMMC /* Run full eMMC card initialization */ status = emmc_card_full_init(); From c5f6777b146e5dea0958ccb4128dc54b771e9dde Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 17 Apr 2026 10:26:46 -0700 Subject: [PATCH 2/2] ZynqMP ZCU102 SD-card Linux boot fixes Fixes for booting PetaLinux 2025.2 from SD card on ZCU102: - hal/zynq: implement hal_dts_fixup() for DTB bootargs override - hal/zynq: add hal_get_timer_us() using ARMv8 generic timer - src/sdhci: add udelay settle times and CMD0 retry loop for cold boot - src/boot_aarch64: clean D-cache and disable MMU/caches at EL2 before jumping to Linux (ARM64 boot protocol requirement) - config/examples/zynqmp_sdcard: reduce DISK_BLOCK_SIZE from 512KB to 128KB to avoid SDMA boundary-crossing corruption --- config/examples/zynqmp_sdcard.config | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/config/examples/zynqmp_sdcard.config b/config/examples/zynqmp_sdcard.config index 1f22c95acf..c0d3dd035d 100644 --- a/config/examples/zynqmp_sdcard.config +++ b/config/examples/zynqmp_sdcard.config @@ -17,7 +17,7 @@ SIGN?=RSA4096 HASH?=SHA3 IMAGE_HEADER_SIZE?=1024 -# Debug options +# Debug options - for production, set DEBUG=0 and drop DEBUG_ZYNQ DEBUG?=0 DEBUG_SYMBOLS=1 DEBUG_UART=1 @@ -38,6 +38,7 @@ NO_XIP=1 # ELF loading support ELF?=1 +#DEBUG_ELF?=1 # Boot Exception Level: leave wolfBoot at EL2 for handoff to Linux (matches # the standard PetaLinux U-Boot flow and preserves KVM/hypervisor use of @@ -79,8 +80,10 @@ WOLFBOOT_NO_PARTITIONS=1 CFLAGS_EXTRA+=-DBOOT_PART_A=1 CFLAGS_EXTRA+=-DBOOT_PART_B=2 -# Disk read chunk size (512KB) -CFLAGS_EXTRA+=-DDISK_BLOCK_SIZE=0x80000 +# Disk read chunk size. Must be less than the SDHCI SDMA buffer boundary +# (512KB default, SDHCI_DMA_BUFF_BOUNDARY in sdhci.h). Using 512KB causes +# a boundary-crossing edge case that corrupts reads under optimization. +CFLAGS_EXTRA+=-DDISK_BLOCK_SIZE=0x10000 # Linux rootfs is on partition 4. Device naming depends on whether both # ZynqMP SDHCI controllers are enabled in the XSA / device tree: