@@ -234,6 +234,7 @@ Known Issues and Limitations
Change Log
----------
+- Added boot-wrapper-aarch64 support for booting SMP payloads at S-EL2.
- Enabled testimage support by default.
- Added virtio\_rng to improve random number generation.
- Added U-Boot v2022.01 for UEFI support.
@@ -18,6 +18,11 @@ SRC_URI:append = " \
file://0014-common-Add-mem-usage-to-memreserve.patch \
file://0015-boot-Add-the-enable-keep-el-compile-option.patch \
file://0016-Makefile-Change-COUNTER_FREQ-to-100-MHz.patch \
+ file://0017-PSCI-Apply-flush-cache-after-setting-branch_data.patch \
+ file://0018-PSCI-Add-function-call-entry-point.patch \
+ file://0019-lds-Rearrange-and-mark-the-sections.patch \
+ file://0020-common-Provide-firmware-info-using-libfdt.patch \
+ file://0021-boot-Enable-firmware-node-initialization.patch \
"
BOOT_WRAPPER_AARCH64_CMDLINE = "\
new file mode 100644
@@ -0,0 +1,52 @@
+From 6923f2a0c59cf92ba5ad50ec1d658a357b4ba5d7 Mon Sep 17 00:00:00 2001
+From: Jaxson Han <jaxson.han@arm.com>
+Date: Tue, 2 Nov 2021 10:48:39 +0800
+Subject: [PATCH] PSCI: Apply flush cache after setting branch_data
+
+For v8-R64, Hypervisor calls boot-wrapper's PSCI service using simple
+function call (instead of hvc).
+
+In this case, hypervisor's main core has enabled MPU and cache, but
+the secondary cores which are spinning have not enabled cache.
+That means if the main core set the branch_data to 1 to boot other
+cores, the secondary cores cannot see the change of branch_data and
+also cannot break the spin.
+
+Thus, the PSCI service in boot-wrapper needs a cache flush after
+setting branch_data in order to let other cores see the change.
+
+Issue-ID: SCM-3816
+Upstream-Status: Inappropriate [other]
+ Implementation pending further discussion
+Signed-off-by: Jaxson Han <jaxson.han@arm.com>
+Change-Id: Ifc282091c54d8fb2ffdb8cfa7fd3ffc1f4be717e
+---
+ common/psci.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/common/psci.c b/common/psci.c
+index 945780b..6efc695 100644
+--- a/common/psci.c
++++ b/common/psci.c
+@@ -24,12 +24,18 @@ static unsigned long branch_table[NR_CPUS];
+
+ bakery_ticket_t branch_table_lock[NR_CPUS];
+
++static inline void flush_per_cpu_data(void *data)
++{
++ asm volatile ("dc cvac, %0" : : "r" (data));
++}
++
+ static int psci_store_address(unsigned int cpu, unsigned long address)
+ {
+ if (branch_table[cpu] != PSCI_ADDR_INVALID)
+ return PSCI_RET_ALREADY_ON;
+
+ branch_table[cpu] = address;
++ flush_per_cpu_data((void*)&(branch_table[cpu]));
+ return PSCI_RET_SUCCESS;
+ }
+
+--
+2.25.1
+
new file mode 100644
@@ -0,0 +1,74 @@
+From ed46e83df2400b1b3f3364169aacf787bd91bd45 Mon Sep 17 00:00:00 2001
+From: Jaxson Han <jaxson.han@arm.com>
+Date: Tue, 25 Jan 2022 14:56:36 +0800
+Subject: [PATCH] PSCI: Add function call entry point
+
+The max exception level of Armv8R AArch64 is EL2, which means it has no
+exclusive EL for firmware. That is, firmware and hypervisors have to share
+the EL2. Also, hypervisors cannot call firmware services via a 'smc'
+instruction. Thus, boot-wrapper has to provide a function entry point
+for Armv8R AArch64.
+
+Issue-Id: SCM-3816
+Upstream-Status: Inappropriate [other]
+ Implementation pending further discussion
+Signed-off-by: Jaxson Han <jaxson.han@arm.com>
+Change-Id: I06ec8e50298603155c6d8ae2330e71db2f111182
+---
+ common/psci.c | 24 ++++++++++++++++++++----
+ 1 file changed, 20 insertions(+), 4 deletions(-)
+
+diff --git a/common/psci.c b/common/psci.c
+index 6efc695..8fdefb5 100644
+--- a/common/psci.c
++++ b/common/psci.c
+@@ -20,6 +20,8 @@
+
+ extern unsigned int spsr_to_elx;
+
++unsigned long flag_from_smc_fn[NR_CPUS];
++
+ static unsigned long branch_table[NR_CPUS];
+
+ bakery_ticket_t branch_table_lock[NR_CPUS];
+@@ -49,12 +51,14 @@ static int psci_cpu_on(unsigned long target_mpidr, unsigned long address)
+ return PSCI_RET_INVALID_PARAMETERS;
+
+ bakery_lock(branch_table_lock, this_cpu);
+- ret = psci_store_address(cpu, address);
+- bakery_unlock(branch_table_lock, this_cpu);
+-
+ #ifdef KEEP_EL
+- spsr_to_elx = SPSR_KERNEL_EL1;
++ if (!flag_from_smc_fn[this_cpu]) {
++ spsr_to_elx = SPSR_KERNEL_EL1;
++ flush_per_cpu_data((void*)&(spsr_to_elx));
++ }
+ #endif
++ ret = psci_store_address(cpu, address);
++ bakery_unlock(branch_table_lock, this_cpu);
+
+ return ret;
+ }
+@@ -90,6 +94,18 @@ long psci_call(unsigned long fid, unsigned long arg1, unsigned long arg2)
+ }
+ }
+
++long smc_fn_entry(unsigned long fid, unsigned long arg1, unsigned long arg2)
++{
++ long ret;
++ unsigned int this_cpu = this_cpu_logical_id();
++
++ flag_from_smc_fn[this_cpu] = 1;
++ ret = psci_call(fid, arg1, arg2);
++ flag_from_smc_fn[this_cpu] = 0;
++
++ return ret;
++}
++
+ void __noreturn psci_first_spin(unsigned int cpu)
+ {
+ if (cpu == MPIDR_INVALID)
+--
+2.25.1
+
new file mode 100644
@@ -0,0 +1,61 @@
+From 36b5fa3f4db49ac7aef42ff1d58a895226c7e96c Mon Sep 17 00:00:00 2001
+From: Jaxson Han <jaxson.han@arm.com>
+Date: Tue, 2 Nov 2021 15:10:28 +0800
+Subject: [PATCH] lds: Rearrange and mark the sections
+
+To make it possible for the next stage to protect sections with MPU,
+boot-wrapper needs to provide the text and data section information.
+By rearranging the .data .rodata and .vector sections, all sections
+can be split into 2 big sections:
+ - RO and Executable
+ - RW and Non-Executable
+Add firmware_data to mark the boundry, thus:
+firmware_start to firmware_data - 1 indicates RO and Executable section,
+firmware_data to firmware_end - 1 indicates RW and Non-Executable
+section.
+
+Also, the firmware_data and firmware_end should align with 64 bytes,
+since Armv8R AArch64 MPU requires it.
+
+Issue-ID: SCM-3816
+Upstream-Status: Inappropriate [other]
+ Implementation pending further discussion
+Signed-off-by: Jaxson Han <jaxson.han@arm.com>
+Change-Id: I55342aa7492f2c7b5c16ab9a6472c8cb45cff8fd
+---
+ model.lds.S | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/model.lds.S b/model.lds.S
+index ab98ddf..85451f9 100644
+--- a/model.lds.S
++++ b/model.lds.S
+@@ -63,12 +63,16 @@ SECTIONS
+ }
+ #endif
+
++#define FIRMWARE_ALIGN . = ALIGN(1 << 6)
+ .boot PHYS_OFFSET: {
+ PROVIDE(firmware_start = .);
+ *(.init)
+ *(.text*)
+- *(.data* .rodata* .bss* COMMON)
+ *(.vectors)
++ *(.rodata*)
++ FIRMWARE_ALIGN;
++ PROVIDE(firmware_data = .);
++ *(.data* .bss* COMMON)
+ *(.stack)
+ PROVIDE(etext = .);
+ }
+@@ -77,6 +81,7 @@ SECTIONS
+ mbox = .;
+ QUAD(0x0)
+ }
++ FIRMWARE_ALIGN;
+ PROVIDE(firmware_end = .);
+
+ ASSERT(etext <= (PHYS_OFFSET + TEXT_LIMIT), ".text overflow!")
+--
+2.25.1
+
new file mode 100644
@@ -0,0 +1,345 @@
+From 8bdbb64d13f14d40546b71dbcfee2b2a8ea002a5 Mon Sep 17 00:00:00 2001
+From: Jaxson Han <jaxson.han@arm.com>
+Date: Wed, 29 Dec 2021 15:17:38 +0800
+Subject: [PATCH] common: Provide firmware info using libfdt
+
+Boot-wrapper uses libfdt to provide more info in device tree.
+We add a new node to include those new firmware relevant infomation.
+The new node defined as follows:
+ fw-shared-info {
+ compatible = "firmware,shared_info";
+
+ #address-cells = <0x02>;
+ #size-cells = <0x02>;
+
+ version = "1.0";
+ regions = <START_ADDR_HIGH START_ADDR_LOW SIZE_HIGH SIZE_LOW
+ 0x0 0x80000000 0x0 0x400000
+ 0x0 0x90000000 0x0 0x400000
+ 0x0 0xA0000000 0x0 0x400000>;
+ regions-permission = "RX", "R", "RWX", "RW";
+ regions-cache = "Cache", "NCache", "Cache", "Device"
+
+ function_entry = <ENTRY_ADDR_HIGH ENRTY_ADDR_LOW>;
+ };
+The node path is /fw-shared-info.
+For boot-wrapper, in real case, it will be:
+ fw-shared-info {
+ compatible = "firmware,shared_info";
+
+ #address-cells = <0x02>;
+ #size-cells = <0x02>;
+
+ version = "1.0";
+ regions = <0x0 firmware_start 0x0 firmware_code_size
+ 0x0 firmware_data 0x0 firmware_data_size>;
+ regions-permission = "RX", "RW";
+ regions-cache = "Cache", "Cache";
+
+ function_entry = <0x0 smc_fn_entry>;
+ };
+
+Issue-Id: SCM-3816
+Upstream-Status: Inappropriate [other]
+ Implementation pending further discussion
+Signed-off-by: Jaxson Han <jaxson.han@arm.com>
+Change-Id: I6ebc59ce2bd3939b0fe066720d57821eaa1bed27
+---
+ common/device_tree.c | 271 ++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 270 insertions(+), 1 deletion(-)
+
+diff --git a/common/device_tree.c b/common/device_tree.c
+index 4d0876c..7f7befc 100644
+--- a/common/device_tree.c
++++ b/common/device_tree.c
+@@ -8,13 +8,225 @@
+ */
+ #include <libfdt.h>
+
++#define DEVICE_TREE_DEBUG 1
++
++#define FW_NODE_NAME "/fw-shared-info"
++#define FW_COMPAT "firmware,shared_info"
++#define FW_INFO_VER "1.0"
++
++#ifdef BOOTWRAPPER_32
++#define CELL_NUM 1
++#define VAL_TYPE uint32_t
++#else
++#define CELL_NUM 2
++#define VAL_TYPE uint64_t
++#endif
++
++#define ALIGN(x) (((x) + (FDT_TAGSIZE) - 1) & ~((FDT_TAGSIZE) - 1))
++
+ extern unsigned long dtb;
+-extern char firmware_start[], firmware_end[];
++extern char firmware_start[], firmware_data[], firmware_end[];
++
++extern long smc_fn_entry(unsigned long, unsigned long, unsigned long);
+
+ extern void print_string(const char *str);
++extern void print_hex(unsigned int val);
+
+ static void *blob;
+
++static char *realloc_node(char *fdt, const char *name)
++{
++ int delta;
++ int new_sz;
++ /* FDT_BEGIN_NODE, node name in off_struct and FDT_END_NODE */
++ delta = sizeof(struct fdt_node_header) + ALIGN(strlen(name) + 1)
++ + FDT_TAGSIZE;
++ new_sz = fdt_totalsize(fdt) + delta;
++ fdt_open_into(fdt, fdt, new_sz);
++ return fdt;
++}
++
++static int create_node(const char *node_name)
++{
++ int node = 0;
++ char *p;
++
++ p = strrchr(node_name, '/');
++ if (!p) {
++ print_string("node name without '/'\r\n");
++ return -1;
++ }
++ *p = '\0';
++
++ blob = realloc_node(blob, p + 1);
++
++ if (p > node_name) {
++ node = fdt_path_offset(blob, node_name);
++ if (node < 0) {
++ print_string("no node name\r\n");
++ return -1;
++ }
++ }
++
++ node = fdt_add_subnode(blob, node, p + 1);
++ if (node < 0) {
++ print_string("add subnode err\r\n");
++ return -1;
++ }
++
++ return node;
++}
++
++static int dt_create_fw_node(void) {
++ int fw_node;
++
++ fw_node = fdt_path_offset(blob, FW_NODE_NAME);
++
++ if(fw_node < 0) {
++ fw_node = create_node(FW_NODE_NAME);
++ }
++
++ return fw_node;
++}
++
++static char *realloc_property(char *fdt, int nodeoffset, const char *name,
++ int newlen)
++{
++ int delta = 0;
++ int oldlen = 0;
++ int new_sz;
++
++ if (!fdt_get_property(fdt, nodeoffset, name, &oldlen))
++ delta = sizeof(struct fdt_property) + strlen(name) + 1;
++
++ if (newlen > oldlen)
++ delta += ALIGN(newlen) - ALIGN(oldlen);
++
++ new_sz = fdt_totalsize(fdt) + delta;
++ fdt_open_into(fdt, fdt, new_sz);
++ return fdt;
++}
++
++static void dt_set_prop(int node, char *property, void *buf, int len)
++{
++ int err;
++
++ err = fdt_setprop(blob, node, property, buf, len);
++ if (err == -FDT_ERR_NOSPACE) {
++ blob = realloc_property(blob, node, property, len);
++ err = fdt_setprop(blob, node, property, buf, len);
++ }
++ if (err) {
++ print_string("fdt error\n\r");
++ }
++}
++
++static void dt_set_prop_u32(int node, char *property, uint32_t val)
++{
++ fdt32_t fdt_val = cpu_to_fdt32(val);
++ int len = sizeof(fdt32_t);
++
++ dt_set_prop(node, property, (void*)&fdt_val, len);
++}
++
++static void dt_set_prop_u64(int node, char *property, uint64_t val)
++{
++ fdt64_t fdt_val = cpu_to_fdt64(val);
++ int len = sizeof(fdt64_t);
++
++ dt_set_prop(node, property, (void*)&fdt_val, len);
++}
++
++/* This dt_set_prop_u32_array maybe unused according to the BOOTWRAPPER_32 */
++__attribute__((unused))
++static void dt_set_prop_u32_array(int node, char *property, uint32_t *vals,
++ int size)
++{
++ fdt32_t *fdt_vals = (fdt32_t*)vals;
++ int len = sizeof(fdt32_t) * size;
++
++ for (int i = 0; i < size; i++) {
++ fdt_vals[i] = cpu_to_fdt32(vals[i]);
++ }
++
++ dt_set_prop(node, property, (void*)fdt_vals, len);
++}
++
++static void dt_set_prop_u64_array(int node, char *property, uint64_t *vals,
++ int size)
++{
++ fdt64_t *fdt_vals = (fdt64_t*)vals;
++ int len = sizeof(fdt64_t) * size;
++
++ for (int i = 0; i < size; i++) {
++ fdt_vals[i] = cpu_to_fdt64(vals[i]);
++ }
++
++ dt_set_prop(node, property, (void*)fdt_vals, len);
++}
++
++#if DEVICE_TREE_DEBUG
++static void dt_dump_string(const void *s, int len)
++{
++ char *sub = (char*)s;
++ int sublen;
++ while(*sub && ((uint64_t)sub - (uint64_t)s) < len) {
++ sublen = strlen(sub) + 1;
++ print_string(sub);
++ print_string(" ");
++ sub += sublen;
++ }
++ print_string("\n\r");
++}
++
++static void dt_dump_fdt32_array(const void *vals, int len)
++{
++ fdt32_t *fdt_vals = (fdt32_t*)vals;
++ len = len / sizeof(fdt32_t);
++ for (int i = 0; i < len; i++) {
++ print_hex(fdt32_to_cpu(fdt_vals[i]));
++ print_string(" ");
++ }
++ print_string("\n\r");
++}
++
++static void dt_dump(int node, char *property, char type)
++{
++ const void *val;
++ int len;
++
++ val = fdt_getprop(blob, node, property, &len);
++ print_string(property);
++ print_string(": ");
++
++ if (type == 's') {
++ /* string type */
++ dt_dump_string(val, len);
++ return;
++ }
++
++ /* uint type */
++ dt_dump_fdt32_array(val, len);
++}
++
++void dt_dump_all(int node)
++{
++ if (node >= 0) {
++ print_string(FW_NODE_NAME" info:\r\n");
++ dt_dump(node, "compatible", 's');
++ dt_dump(node, "version", 's');
++ dt_dump(node, "function_entry", 'i');
++ dt_dump(node, "address-cells", 'i');
++ dt_dump(node, "size-cells", 'i');
++ dt_dump(node, "regions", 'i');
++ dt_dump(node, "regions-permission", 's');
++ dt_dump(node, "regions-cache", 's');
++ print_string("\r\n");
++ }
++}
++#else
++void dt_dump_all(int node) { (void*)node; return; }
++#endif
+
+ void dt_add_memreserve(void)
+ {
+@@ -32,3 +244,60 @@ void dt_add_memreserve(void)
+ print_string("reserve mem add err\n\r");
+ }
+ }
++
++void dt_fw_node_init(int enable)
++{
++ int fw_node;
++
++ VAL_TYPE regions[] = {
++ /* code region: start, end, ro, x, cachable */
++ (VAL_TYPE)firmware_start,
++ (VAL_TYPE)(firmware_data - firmware_start),
++ /* data region: start, end, rw, xn, cachable */
++ (VAL_TYPE)firmware_data,
++ (VAL_TYPE)(firmware_end - firmware_data),
++ };
++ int regions_num = sizeof(regions) / sizeof(VAL_TYPE);
++ char regions_permission[] = "RX\0RW";
++ char regions_cache[] = "Cache\0Cache";
++
++ if (!enable)
++ return;
++
++ print_string("Prepare "FW_NODE_NAME" node\n\r");
++
++ blob = (void*)&dtb;
++
++ if(fdt_path_offset(blob, "/psci") < 0) {
++ print_string("/psci node not found\n\r");
++ return;
++ }
++
++ fw_node = dt_create_fw_node();
++
++ if(fw_node < 0) {
++ print_string(FW_NODE_NAME" node create err\n\r");
++ }
++
++ dt_set_prop(fw_node, "compatible", FW_COMPAT, sizeof(FW_COMPAT));
++ dt_set_prop(fw_node, "version", FW_INFO_VER, sizeof(FW_INFO_VER));
++
++ dt_set_prop_u32(fw_node, "address-cells", CELL_NUM);
++ dt_set_prop_u32(fw_node, "size-cells", CELL_NUM);
++ dt_set_prop(fw_node, "regions-permission", regions_permission,
++ sizeof(regions_permission));
++ dt_set_prop(fw_node, "regions-cache", regions_cache,
++ sizeof(regions_cache));
++
++#ifdef BOOTWRAPPER_32
++ dt_set_prop_u32_array(fw_node, "regions", regions, regions_num);
++ dt_set_prop_u32(fw_node, "function_entry", (VAL_TYPE)smc_fn_entry);
++#else
++ dt_set_prop_u64_array(fw_node, "regions", regions, regions_num);
++ dt_set_prop_u64(fw_node, "function_entry", (VAL_TYPE)smc_fn_entry);
++#endif
++
++ fdt_pack(blob);
++
++ dt_dump_all(fw_node);
++}
+--
+2.25.1
+
new file mode 100644
@@ -0,0 +1,98 @@
+From 6dfc937d1ae54d2ae9f8c60ca29ba73ca14dc8c4 Mon Sep 17 00:00:00 2001
+From: Jaxson Han <jaxson.han@arm.com>
+Date: Wed, 29 Dec 2021 15:33:17 +0800
+Subject: [PATCH] boot: Enable firmware node initialization
+
+Enable the firmware node initialization, so that the next stage
+(hypervisor) could share the EL2 with firmware (boot-wrapper). The next
+stage (hypervisor) get the smccc entry point, code/data sections, the
+sections attrs and firmware node version and so on.
+It is worth noting that this EL2 sharing mechanism is only for Armv8R
+AArch64, thus add flag_v8r to record if the arch is Armv8R AArch64.
+Enable the firmware node initialization only if it is Armv8R AArch64.
+Also, we increase the stack size to 1024 to fix the stack overflow issue
+when using the libfdt.
+
+Add -fno-builtin options to CFLAGS to avoid the issue that the 'memset'
+in common/lib.c conflicts with builtin 'memset' function. GCC version
+>= 10 will have an incorrect compilation without -fno-builtin;
+
+Issue-Id: SCM-3816
+Upstream-Status: Inappropriate [other]
+ Implementation pending further discussion
+Signed-off-by: Jaxson Han <jaxson.han@arm.com>
+Change-Id: Ib274485a34d26215595fd0cd737be86610289817
+---
+ Makefile.am | 4 ++--
+ arch/aarch64/boot.S | 6 ++++++
+ common/boot.c | 4 ++++
+ 3 files changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/Makefile.am b/Makefile.am
+index 054becd..b01809c 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -23,7 +23,7 @@ DEFINES += -DCPU_IDS=$(CPU_IDS)
+ DEFINES += -DNR_CPUS=$(NR_CPUS)
+ DEFINES += $(if $(SYSREGS_BASE), -DSYSREGS_BASE=$(SYSREGS_BASE), )
+ DEFINES += -DUART_BASE=$(UART_BASE)
+-DEFINES += -DSTACK_SIZE=256
++DEFINES += -DSTACK_SIZE=1024
+
+ if KERNEL_32
+ DEFINES += -DKERNEL_32
+@@ -132,7 +132,7 @@ CHOSEN_NODE := chosen { \
+ CPPFLAGS += $(INITRD_FLAGS)
+ CFLAGS += -I$(top_srcdir)/include/ -I$(top_srcdir)/$(ARCH_SRC)/include/
+ CFLAGS += -Wall -fomit-frame-pointer
+-CFLAGS += -fno-stack-protector
++CFLAGS += -fno-stack-protector -fno-builtin
+ CFLAGS += -ffunction-sections -fdata-sections
+ CFLAGS += -fno-pic -fno-pie
+ LDFLAGS += --gc-sections
+diff --git a/arch/aarch64/boot.S b/arch/aarch64/boot.S
+index 157c097..f310387 100644
+--- a/arch/aarch64/boot.S
++++ b/arch/aarch64/boot.S
+@@ -240,6 +240,10 @@ el2_init:
+ #endif
+ ldr x1, =spsr_to_elx
+ str w0, [x1]
++
++ mov w0, #1
++ ldr x1, =flag_v8r
++ str w0, [x1]
+ // fall through
+
+ el_max_init:
+@@ -319,3 +323,5 @@ flag_keep_el:
+ .long 0
+ ASM_DATA(spsr_to_elx)
+ .long 0
++ASM_DATA(flag_v8r)
++ .long 0
+diff --git a/common/boot.c b/common/boot.c
+index ee2bea0..38b2dca 100644
+--- a/common/boot.c
++++ b/common/boot.c
+@@ -11,6 +11,9 @@
+
+ extern unsigned long entrypoint;
+ extern unsigned long dtb;
++extern unsigned int flag_v8r;
++
++extern void dt_fw_node_init(int enable);
+
+ void init_platform(void);
+
+@@ -64,6 +67,7 @@ void __noreturn first_spin(unsigned int cpu, unsigned long *mbox,
+ if (cpu == 0) {
+ init_platform();
+ dt_add_memreserve();
++ dt_fw_node_init(flag_v8r == 1);
+
+ *mbox = (unsigned long)&entrypoint;
+ sevl();
+--
+2.25.1
+