Re: [v2 PATCH 5/5] RISC-V: Add EFI stub support.

From: Ard Biesheuvel
Date: Wed Apr 15 2020 - 03:39:03 EST


On Wed, 15 Apr 2020 at 02:11, Atish Patra <Atish.Patra@xxxxxxx> wrote:
>
> On Tue, 2020-04-14 at 09:41 +0200, Ard Biesheuvel wrote:
> > On Mon, 13 Apr 2020 at 23:29, Atish Patra <atish.patra@xxxxxxx>
> > wrote:
> > > Add a RISC-V architecture specific stub code that actually copies
> > > the
> > > actual kernel image to a valid address and jump to it after boot
> > > services
> > > are terminated. Enable UEFI related kernel configs as well for
> > > RISC-V.
> > >
> > > Signed-off-by: Atish Patra <atish.patra@xxxxxxx>
> > > ---
> > > arch/riscv/Kconfig | 20 ++++
> > > arch/riscv/Makefile | 1 +
> > > arch/riscv/configs/defconfig | 1 +
> > > arch/riscv/include/asm/efi.h | 45 ++++++++
> > > drivers/firmware/efi/Kconfig | 2 +-
> > > drivers/firmware/efi/libstub/Makefile | 8 ++
> > > drivers/firmware/efi/libstub/riscv-stub.c | 131
> > > ++++++++++++++++++++++
> > > 7 files changed, 207 insertions(+), 1 deletion(-)
> > > create mode 100644 arch/riscv/include/asm/efi.h
> > > create mode 100644 drivers/firmware/efi/libstub/riscv-stub.c
> > >
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index f39e326a7a42..eb4f41c8f3ce 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -379,10 +379,30 @@ config CMDLINE_FORCE
> > >
> > > endchoice
> > >
> > > +config EFI_STUB
> > > + bool
> > > +
> > > +config EFI
> > > + bool "UEFI runtime support"
> > > + depends on OF
> > > + select LIBFDT
> > > + select UCS2_STRING
> > > + select EFI_PARAMS_FROM_FDT
> > > + select EFI_STUB
> > > + select EFI_GENERIC_STUB
> > > + default y
> > > + help
> > > + This option provides support for runtime services
> > > provided
> > > + by UEFI firmware (such as non-volatile variables,
> > > realtime
> > > + clock, and platform reset). A UEFI stub is also provided
> > > to
> > > + allow the kernel to be booted as an EFI application. This
> > > + is only useful on systems that have UEFI firmware.
> > > +
> > > endmenu
> > >
> > > menu "Power management options"
> > >
> > > source "kernel/power/Kconfig"
> > > +source "drivers/firmware/Kconfig"
> > >
> > > endmenu
> > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > index fb6e37db836d..079435804d6d 100644
> > > --- a/arch/riscv/Makefile
> > > +++ b/arch/riscv/Makefile
> > > @@ -80,6 +80,7 @@ head-y := arch/riscv/kernel/head.o
> > > core-y += arch/riscv/
> > >
> > > libs-y += arch/riscv/lib/
> > > +core-$(CONFIG_EFI_STUB) +=
> > > $(objtree)/drivers/firmware/efi/libstub/lib.a
> > >
> > > PHONY += vdso_install
> > > vdso_install:
> > > diff --git a/arch/riscv/configs/defconfig
> > > b/arch/riscv/configs/defconfig
> > > index 4da4886246a4..ae69e12d306a 100644
> > > --- a/arch/riscv/configs/defconfig
> > > +++ b/arch/riscv/configs/defconfig
> > > @@ -129,3 +129,4 @@ CONFIG_DEBUG_BLOCK_EXT_DEVT=y
> > > # CONFIG_RUNTIME_TESTING_MENU is not set
> > > CONFIG_MEMTEST=y
> > > # CONFIG_SYSFS_SYSCALL is not set
> > > +CONFIG_EFI=y
> > > diff --git a/arch/riscv/include/asm/efi.h
> > > b/arch/riscv/include/asm/efi.h
> > > new file mode 100644
> > > index 000000000000..ba0a6d35cc15
> > > --- /dev/null
> > > +++ b/arch/riscv/include/asm/efi.h
> > > @@ -0,0 +1,45 @@
> > > +/* SPDX-License-Identifier: GPL-2.0 */
> > > +/*
> > > + * Copyright (C) 2020 Western Digital Corporation or its
> > > affiliates.
> > > + * Based on arch/arm64/include/asm/efi.h
> > > + */
> > > +#ifndef _ASM_EFI_H
> > > +#define _ASM_EFI_H
> > > +
> > > +#include <asm/io.h>
> > > +#include <asm/mmu_context.h>
> > > +#include <asm/ptrace.h>
> > > +#include <asm/tlbflush.h>
> > > +
> > > +#define VA_BITS_MIN 39
> > > +
> > > +/* on RISC-V, the FDT may be located anywhere in system RAM */
> > > +static inline unsigned long efi_get_max_fdt_addr(unsigned long
> > > dram_base)
> > > +{
> > > + return ULONG_MAX;
> > > +}
> > > +
> > > +/* Load initrd at enough distance from DRAM start */
> > > +static inline unsigned long efi_get_max_initrd_addr(unsigned long
> > > dram_base,
> > > + unsigned long
> > > image_addr)
> > > +{
> > > + return dram_base + SZ_256M;
> > > +}
> > > +
> > > +#define efi_bs_call(func, ...) efi_system_table()->boottime-
> > > >func(__VA_ARGS__)
> > > +#define efi_rt_call(func, ...) efi_system_table()->runtime-
> > > >func(__VA_ARGS__)
> > > +#define efi_is_native() (true)
> > > +
> > > +#define efi_table_attr(inst, attr) (inst->attr)
> > > +
> > > +#define efi_call_proto(inst, func, ...) inst->func(inst,
> > > ##__VA_ARGS__)
> > > +
> > > +#define alloc_screen_info(x...) (&screen_info)
> > > +extern char stext_offset[];
> > > +
> > > +static inline void free_screen_info(struct screen_info *si)
> > > +{
> > > +}
> > > +#define EFI_ALLOC_ALIGN SZ_64K
> > > +
> > > +#endif /* _ASM_EFI_H */
> > > diff --git a/drivers/firmware/efi/Kconfig
> > > b/drivers/firmware/efi/Kconfig
> > > index 2a2b2b96a1dc..fcdc789d3f87 100644
> > > --- a/drivers/firmware/efi/Kconfig
> > > +++ b/drivers/firmware/efi/Kconfig
> > > @@ -111,7 +111,7 @@ config EFI_GENERIC_STUB
> > >
> > > config EFI_ARMSTUB_DTB_LOADER
> > > bool "Enable the DTB loader"
> > > - depends on EFI_GENERIC_STUB
> > > + depends on EFI_GENERIC_STUB && !RISCV
> > > default y
> > > help
> > > Select this config option to add support for the dtb=
> > > command
> > > diff --git a/drivers/firmware/efi/libstub/Makefile
> > > b/drivers/firmware/efi/libstub/Makefile
> > > index d590504541f6..b1db3a793c43 100644
> > > --- a/drivers/firmware/efi/libstub/Makefile
> > > +++ b/drivers/firmware/efi/libstub/Makefile
> > > @@ -22,6 +22,8 @@ cflags-$(CONFIG_ARM64) := $(subst
> > > $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
> > > cflags-$(CONFIG_ARM) := $(subst
> > > $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
> > > -fno-builtin -fpic \
> > > $(call cc-option,-mno-single-
> > > pic-base)
> > > +cflags-$(CONFIG_RISCV) := $(subst
> > > $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) \
> > > + -fpic
> > >
> > > cflags-$(CONFIG_EFI_GENERIC_STUB) +=
> > > -I$(srctree)/scripts/dtc/libfdt
> > >
> > > @@ -56,6 +58,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) +=
> > > efi-stub.o fdt.o string.o \
> > > lib-$(CONFIG_ARM) += arm32-stub.o
> > > lib-$(CONFIG_ARM64) += arm64-stub.o
> > > lib-$(CONFIG_X86) += x86-stub.o
> > > +lib-$(CONFIG_RISCV) += riscv-stub.o
> > > CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
> > > CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
> > >
> > > @@ -80,6 +83,11 @@ STUBCOPY_FLAGS-$(CONFIG_ARM64) += --
> > > prefix-alloc-sections=.init \
> > > --prefix-symbols=__efistub_
> > > STUBCOPY_RELOC-$(CONFIG_ARM64) := R_AARCH64_ABS
> > >
> > > +STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \
> > > + --prefix-symbols=__efistub_
> > > +STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20
> > > +
> > > +
> > > $(obj)/%.stub.o: $(obj)/%.o FORCE
> > > $(call if_changed,stubcopy)
> > >
> > > diff --git a/drivers/firmware/efi/libstub/riscv-stub.c
> > > b/drivers/firmware/efi/libstub/riscv-stub.c
> > > new file mode 100644
> > > index 000000000000..acb69eae187a
> > > --- /dev/null
> > > +++ b/drivers/firmware/efi/libstub/riscv-stub.c
> > > @@ -0,0 +1,131 @@
> > > +// SPDX-License-Identifier: GPL-2.0
> > > +/*
> > > + * Copyright (C) 2013, 2014 Linaro Ltd; <roy.franz@xxxxxxxxxx>
> > > + * Copyright (C) 2020 Western Digital Corporation or its
> > > affiliates.
> > > + *
> > > + * This file implements the EFI boot stub for the RISC-V kernel.
> > > + * Adapted from ARM64 version at
> > > drivers/firmware/efi/libstub/arm64-stub.c.
> > > + */
> > > +
> > > +#include <linux/efi.h>
> > > +#include <linux/libfdt.h>
> > > +#include <linux/libfdt_env.h>
> > > +#include <asm/efi.h>
> > > +#include <asm/sections.h>
> > > +
> > > +#include "efistub.h"
> > > +/*
> > > + * RISCV requires the kernel image to placed TEXT_OFFSET bytes
> > > beyond a 2 MB
> > > + * aligned base for 64 bit and 4MB for 32 bit.
> > > + */
> > > +#ifdef CONFIG_64BIT
> > > +#define MIN_KIMG_ALIGN SZ_2M
> > > +#else
> > > +#define MIN_KIMG_ALIGN SZ_4M
> > > +#endif
> > > +/*
> > > + * TEXT_OFFSET ensures that we don't overwrite the firmware that
> > > probably sits
> > > + * at the beginning of the DRAM.
> > > + */
> > > +#define TEXT_OFFSET MIN_KIMG_ALIGN
> > > +
> >
> > Again, this is not the right approach. If there are any allocations
> > in
> > memory that EFI cannot touch, you have to mark them as reserved in
> > the
> > EFI memory map. Otherwise, anything running in the EFI context (GRUB,
> > systemd-boot, shim, etc) could step on it, not just the kernel.
> >
>
> Correct. That's why I added the U-Boot series which takes care of
> modifying the DT to add the reserved memory regions. With those
> changes, any memory marked reserved by the firmware will be marked
> reserved in the EFI memory map. The proper kernel will
> also never map those memory regions if "nomap" is set.
>

OK, good to know.

> To be honest, I forgot to remove the TEXT_OFFSET in this series. I
> have removed it in the next version.
>

OK.

> > > +typedef __attribute__((noreturn)) void
> > > (*jump_kernel_func)(unsigned int,
> > > + unsigned
> > > long);
> > > +efi_status_t check_platform_features(void)
> > > +{
> > > + return EFI_SUCCESS;
> > > +}
> > > +
> > > +static u32 get_boot_hartid_from_fdt(unsigned long fdt)
> > > +{
> > > + int chosen_node, len;
> > > + const fdt32_t *prop;
> > > +
> > > + chosen_node = fdt_path_offset((void *)fdt, "/chosen");
> > > + if (chosen_node < 0)
> > > + return U32_MAX;
> > > + prop = fdt_getprop((void *)fdt, chosen_node, "boot-hartid",
> > > &len);
> > > + if (!prop || len != sizeof(u32))
> > > + return U32_MAX;
> > > +
> > > + return fdt32_to_cpu(*prop);
> > > +}
> > > +
> > > +/*
> > > + * Jump to real kernel here with following constraints.
> > > + * 1. MMU should be disabled.
> > > + * 2. a0 should contain hartid
> > > + * 3. a1 should DT address
> > > + */
> > > +void __noreturn efi_enter_kernel(unsigned long entrypoint,
> > > unsigned long fdt,
> > > + unsigned long fdt_size)
> > > +{
> > > + unsigned long kernel_entry = entrypoint + (unsigned
> > > long)stext_offset;
> > > + jump_kernel_func jump_kernel = (void (*)(unsigned int,
> > > unsigned long))kernel_entry;
> > > + u32 hartid = get_boot_hartid_from_fdt(fdt);
> > > +
> > > + if (hartid == U32_MAX)
> > > + /* We can not use panic or BUG at this point */
> > > + __asm__ __volatile__ ("ebreak");
> > > + /* Disable MMU */
> > > + csr_write(CSR_SATP, 0);
> > > + jump_kernel(hartid, fdt);
> > > +}
> > > +
> > > +efi_status_t handle_kernel_image(unsigned long *image_addr,
> > > + unsigned long *image_size,
> > > + unsigned long *reserve_addr,
> > > + unsigned long *reserve_size,
> > > + unsigned long dram_base,
> > > + efi_loaded_image_t *image)
> > > +{
> > > + efi_status_t status;
> > > + unsigned long kernel_size, kernel_memsize = 0;
> > > + unsigned long preferred_offset;
> > > +
> > > + /*
> > > + * The preferred offset of the kernel Image is TEXT_OFFSET
> > > bytes beyond
> > > + * a KIMG_ALIGN aligned base.
> >
> > MIN_KIMG_ALIGN
> >
> > > + */
> > > + preferred_offset = round_up(dram_base, MIN_KIMG_ALIGN) +
> > > TEXT_OFFSET;
> > > +
> > > + kernel_size = _edata - _start;
> > > + kernel_memsize = kernel_size + (_end - _edata);
> > > +
> > > + /*
> > > + * Try a straight allocation at the preferred offset. It
> > > will also
> > > + * ensure that, on platforms where the [dram_base,
> > > dram_base + TEXT_OFFSET)
> > > + * interval is partially occupied by the firmware we can
> > > still place
> > > + * the kernel at the address 'dram_base + TEXT_OFFSET'. If
> > > the straight
> > > + * allocation fails, efi_low_alloc tries allocate memory
> > > from the lowest
> > > + * available LOADER_DATA mapped memory as long as address
> > > and size meet
> > > + * the alignment constraints.
> > > + */
> > > + if (*image_addr == preferred_offset)
> > > + return EFI_SUCCESS;
> > > +
> > > + *image_addr = *reserve_addr = preferred_offset;
> > > + *reserve_size = round_up(kernel_memsize, EFI_ALLOC_ALIGN);
> > > +
> > > + status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
> > > + EFI_LOADER_DATA,
> > > + *reserve_size / EFI_PAGE_SIZE,
> > > + (efi_physical_addr_t
> > > *)reserve_addr);
> > > +
> > > + if (status != EFI_SUCCESS) {
> > > + pr_efi("straight allocation failed do a low
> > > alloc\n");
> > > + *reserve_size = kernel_memsize + TEXT_OFFSET;
> > > + status = efi_low_alloc(*reserve_size,
> > > MIN_KIMG_ALIGN,
> > > + reserve_addr);
> > > +
> >
> > So, instead of the above, could we simply allocate kernel_memsize
> > bytes using efi_allocate_pages(), with the max address set to
> > 'round_up(dram_base, MIN_KIMG_ALIGN) + kernel_memsize + TEXT_OFFSET -
> > 1'? This should work in the majority of cases, and not trample on the
> > TEXT_OFFSET bytes at the start, regardless of whether they are
> > reserved or not.
> >
>
> As per my understanding, efi_allocate_pages aligns as per
> EFI_ALLOC_ALIGN. But the kernel needs to be placed at a memory aligned
> to MIN_KIMG_ALIGN. That's why efi_allocate_pages may not work.
>

It will work if you do the round_up() afterwards. But
efi_allocate_pages_aligned() essentially does the same thing, so if
that works for you, it is preferred.

> > In the future, I imagine you may want to relax the requirements
> > regarding the physical placement of the kernel, in which case you can
> > start falling back to a suitably aligned allocation anywhere in
> > memory.
> >
> >
> >
> > > + if (status != EFI_SUCCESS) {
> > > + pr_efi_err("Failed to relocate kernel\n");
> > > + *reserve_size = 0;
> > > + return status;
> > > + }
> > > + *image_addr = *reserve_addr + TEXT_OFFSET;
> > > + }
> > > + memcpy((void *)*image_addr, image->image_base,
> > > kernel_size);
> > > +
> > > + return EFI_SUCCESS;
> > > +}
> > > --
> > > 2.24.0
> > >
>
> Actually, we don't have any kernel placement restriction other than
> 2MB/4MB alignment.
> (considering that firmware now marks all the reserved memory in DT).
>
> Can we just use efi_allocate_pages_aligned similar to arm64 ? It seems
> to work during my testing. But I wanted to make sure that I didn't
> misunderstand anything.
>

Yes, that is the preferred approach.