firmware: Use lla to access all global symbols

When OpenSBI is compiled as fPIE mode, the assembler will translate "la"
to GOT reference pattern. It will cause to cost an additional load
instruction when obtaining the symbol address. However, if the symbol
locates within the positive or negative 2GB region, we can use "lla"
instead of "la" to avoid unneeded GOT references. This patch assumes that
the OpenSBI image excluding the payload does not exceed 2GB. Based on
this assumption, all "la" instructions are replaced by "lla" to avoid
performance degradation when compiling as fPIE mode.

Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
Reviewed-by: Anup Patel <anup.patel@wdc.com>
This commit is contained in:
Vincent Chen
2021-03-17 09:16:37 +08:00
committed by Anup Patel
parent ff5bd949d5
commit 22d8ee9758
5 changed files with 64 additions and 64 deletions

View File

@@ -57,39 +57,39 @@ _start:
bne a0, a6, _wait_relocate_copy_done bne a0, a6, _wait_relocate_copy_done
_try_lottery: _try_lottery:
/* Jump to relocation wait loop if we don't get relocation lottery */ /* Jump to relocation wait loop if we don't get relocation lottery */
la a6, _relocate_lottery lla a6, _relocate_lottery
li a7, 1 li a7, 1
amoadd.w a6, a7, (a6) amoadd.w a6, a7, (a6)
bnez a6, _wait_relocate_copy_done bnez a6, _wait_relocate_copy_done
/* Save load address */ /* Save load address */
la t0, _load_start lla t0, _load_start
la t1, _start lla t1, _start
REG_S t1, 0(t0) REG_S t1, 0(t0)
/* Relocate if load address != link address */ /* Relocate if load address != link address */
_relocate: _relocate:
la t0, _link_start lla t0, _link_start
REG_L t0, 0(t0) REG_L t0, 0(t0)
la t1, _link_end lla t1, _link_end
REG_L t1, 0(t1) REG_L t1, 0(t1)
la t2, _load_start lla t2, _load_start
REG_L t2, 0(t2) REG_L t2, 0(t2)
sub t3, t1, t0 sub t3, t1, t0
add t3, t3, t2 add t3, t3, t2
beq t0, t2, _relocate_done beq t0, t2, _relocate_done
la t4, _relocate_done lla t4, _relocate_done
sub t4, t4, t2 sub t4, t4, t2
add t4, t4, t0 add t4, t4, t0
blt t2, t0, _relocate_copy_to_upper blt t2, t0, _relocate_copy_to_upper
_relocate_copy_to_lower: _relocate_copy_to_lower:
ble t1, t2, _relocate_copy_to_lower_loop ble t1, t2, _relocate_copy_to_lower_loop
la t3, _relocate_lottery lla t3, _relocate_lottery
BRANGE t2, t1, t3, _start_hang BRANGE t2, t1, t3, _start_hang
la t3, _boot_status lla t3, _boot_status
BRANGE t2, t1, t3, _start_hang BRANGE t2, t1, t3, _start_hang
la t3, _relocate lla t3, _relocate
la t5, _relocate_done lla t5, _relocate_done
BRANGE t2, t1, t3, _start_hang BRANGE t2, t1, t3, _start_hang
BRANGE t2, t1, t5, _start_hang BRANGE t2, t1, t5, _start_hang
BRANGE t3, t5, t2, _start_hang BRANGE t3, t5, t2, _start_hang
@@ -102,12 +102,12 @@ _relocate_copy_to_lower_loop:
jr t4 jr t4
_relocate_copy_to_upper: _relocate_copy_to_upper:
ble t3, t0, _relocate_copy_to_upper_loop ble t3, t0, _relocate_copy_to_upper_loop
la t2, _relocate_lottery lla t2, _relocate_lottery
BRANGE t0, t3, t2, _start_hang BRANGE t0, t3, t2, _start_hang
la t2, _boot_status lla t2, _boot_status
BRANGE t0, t3, t2, _start_hang BRANGE t0, t3, t2, _start_hang
la t2, _relocate lla t2, _relocate
la t5, _relocate_done lla t5, _relocate_done
BRANGE t0, t3, t2, _start_hang BRANGE t0, t3, t2, _start_hang
BRANGE t0, t3, t5, _start_hang BRANGE t0, t3, t5, _start_hang
BRANGE t2, t5, t0, _start_hang BRANGE t2, t5, t0, _start_hang
@@ -119,12 +119,12 @@ _relocate_copy_to_upper_loop:
blt t0, t1, _relocate_copy_to_upper_loop blt t0, t1, _relocate_copy_to_upper_loop
jr t4 jr t4
_wait_relocate_copy_done: _wait_relocate_copy_done:
la t0, _start lla t0, _start
la t1, _link_start lla t1, _link_start
REG_L t1, 0(t1) REG_L t1, 0(t1)
beq t0, t1, _wait_for_boot_hart beq t0, t1, _wait_for_boot_hart
la t2, _boot_status lla t2, _boot_status
la t3, _wait_for_boot_hart lla t3, _wait_for_boot_hart
sub t3, t3, t0 sub t3, t3, t0
add t3, t3, t1 add t3, t3, t1
1: 1:
@@ -143,10 +143,10 @@ _relocate_done:
* Mark relocate copy done * Mark relocate copy done
* Use _boot_status copy relative to the load address * Use _boot_status copy relative to the load address
*/ */
la t0, _boot_status lla t0, _boot_status
la t1, _link_start lla t1, _link_start
REG_L t1, 0(t1) REG_L t1, 0(t1)
la t2, _load_start lla t2, _load_start
REG_L t2, 0(t2) REG_L t2, 0(t2)
sub t0, t0, t1 sub t0, t0, t1
add t0, t0, t2 add t0, t0, t2
@@ -161,19 +161,19 @@ _relocate_done:
call _reset_regs call _reset_regs
/* Zero-out BSS */ /* Zero-out BSS */
la s4, _bss_start lla s4, _bss_start
la s5, _bss_end lla s5, _bss_end
_bss_zero: _bss_zero:
REG_S zero, (s4) REG_S zero, (s4)
add s4, s4, __SIZEOF_POINTER__ add s4, s4, __SIZEOF_POINTER__
blt s4, s5, _bss_zero blt s4, s5, _bss_zero
/* Setup temporary trap handler */ /* Setup temporary trap handler */
la s4, _start_hang lla s4, _start_hang
csrw CSR_MTVEC, s4 csrw CSR_MTVEC, s4
/* Setup temporary stack */ /* Setup temporary stack */
la s4, _fw_end lla s4, _fw_end
li s5, (SBI_SCRATCH_SIZE * 2) li s5, (SBI_SCRATCH_SIZE * 2)
add sp, s4, s5 add sp, s4, s5
@@ -184,7 +184,7 @@ _bss_zero:
#ifdef FW_FDT_PATH #ifdef FW_FDT_PATH
/* Override previous arg1 */ /* Override previous arg1 */
la a1, fw_fdt_bin lla a1, fw_fdt_bin
#endif #endif
/* /*
@@ -202,7 +202,7 @@ _bss_zero:
* s7 -> HART Count * s7 -> HART Count
* s8 -> HART Stack Size * s8 -> HART Stack Size
*/ */
la a4, platform lla a4, platform
#if __riscv_xlen == 64 #if __riscv_xlen == 64
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4) lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4) lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
@@ -212,7 +212,7 @@ _bss_zero:
#endif #endif
/* Setup scratch space for all the HARTs*/ /* Setup scratch space for all the HARTs*/
la tp, _fw_end lla tp, _fw_end
mul a5, s7, s8 mul a5, s7, s8
add tp, tp, a5 add tp, tp, a5
/* Keep a copy of tp */ /* Keep a copy of tp */
@@ -230,8 +230,8 @@ _scratch_init:
/* Initialize scratch space */ /* Initialize scratch space */
/* Store fw_start and fw_size in scratch space */ /* Store fw_start and fw_size in scratch space */
la a4, _fw_start lla a4, _fw_start
la a5, _fw_end lla a5, _fw_end
mul t0, s7, s8 mul t0, s7, s8
add a5, a5, t0 add a5, a5, t0
sub a5, a5, a4 sub a5, a5, a4
@@ -253,16 +253,16 @@ _scratch_init:
REG_S a0, SBI_SCRATCH_NEXT_MODE_OFFSET(tp) REG_S a0, SBI_SCRATCH_NEXT_MODE_OFFSET(tp)
MOV_3R a0, s0, a1, s1, a2, s2 MOV_3R a0, s0, a1, s1, a2, s2
/* Store warm_boot address in scratch space */ /* Store warm_boot address in scratch space */
la a4, _start_warm lla a4, _start_warm
REG_S a4, SBI_SCRATCH_WARMBOOT_ADDR_OFFSET(tp) REG_S a4, SBI_SCRATCH_WARMBOOT_ADDR_OFFSET(tp)
/* Store platform address in scratch space */ /* Store platform address in scratch space */
la a4, platform lla a4, platform
REG_S a4, SBI_SCRATCH_PLATFORM_ADDR_OFFSET(tp) REG_S a4, SBI_SCRATCH_PLATFORM_ADDR_OFFSET(tp)
/* Store hartid-to-scratch function address in scratch space */ /* Store hartid-to-scratch function address in scratch space */
la a4, _hartid_to_scratch lla a4, _hartid_to_scratch
REG_S a4, SBI_SCRATCH_HARTID_TO_SCRATCH_OFFSET(tp) REG_S a4, SBI_SCRATCH_HARTID_TO_SCRATCH_OFFSET(tp)
/* Store trap-exit function address in scratch space */ /* Store trap-exit function address in scratch space */
la a4, _trap_exit lla a4, _trap_exit
REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(tp) REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(tp)
/* Clear tmp0 in scratch space */ /* Clear tmp0 in scratch space */
REG_S zero, SBI_SCRATCH_TMP0_OFFSET(tp) REG_S zero, SBI_SCRATCH_TMP0_OFFSET(tp)
@@ -343,7 +343,7 @@ _fdt_reloc_done:
/* mark boot hart done */ /* mark boot hart done */
li t0, BOOT_STATUS_BOOT_HART_DONE li t0, BOOT_STATUS_BOOT_HART_DONE
la t1, _boot_status lla t1, _boot_status
REG_S t0, 0(t1) REG_S t0, 0(t1)
fence rw, rw fence rw, rw
j _start_warm j _start_warm
@@ -351,7 +351,7 @@ _fdt_reloc_done:
/* waiting for boot hart to be done (_boot_status == 2) */ /* waiting for boot hart to be done (_boot_status == 2) */
_wait_for_boot_hart: _wait_for_boot_hart:
li t0, BOOT_STATUS_BOOT_HART_DONE li t0, BOOT_STATUS_BOOT_HART_DONE
la t1, _boot_status lla t1, _boot_status
REG_L t1, 0(t1) REG_L t1, 0(t1)
/* Reduce the bus traffic so that boot hart may proceed faster */ /* Reduce the bus traffic so that boot hart may proceed faster */
nop nop
@@ -369,7 +369,7 @@ _start_warm:
csrw CSR_MIP, zero csrw CSR_MIP, zero
/* Find HART count and HART stack size */ /* Find HART count and HART stack size */
la a4, platform lla a4, platform
#if __riscv_xlen == 64 #if __riscv_xlen == 64
lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4) lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4) lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
@@ -400,7 +400,7 @@ _start_warm:
3: bge s6, s7, _start_hang 3: bge s6, s7, _start_hang
/* Find the scratch space based on HART index */ /* Find the scratch space based on HART index */
la tp, _fw_end lla tp, _fw_end
mul a5, s7, s8 mul a5, s7, s8
add tp, tp, a5 add tp, tp, a5
mul a5, s8, s6 mul a5, s8, s6
@@ -415,13 +415,13 @@ _start_warm:
add sp, tp, zero add sp, tp, zero
/* Setup trap handler */ /* Setup trap handler */
la a4, _trap_handler lla a4, _trap_handler
#if __riscv_xlen == 32 #if __riscv_xlen == 32
csrr a5, CSR_MISA csrr a5, CSR_MISA
srli a5, a5, ('H' - 'A') srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1 andi a5, a5, 0x1
beq a5, zero, _skip_trap_handler_rv32_hyp beq a5, zero, _skip_trap_handler_rv32_hyp
la a4, _trap_handler_rv32_hyp lla a4, _trap_handler_rv32_hyp
_skip_trap_handler_rv32_hyp: _skip_trap_handler_rv32_hyp:
#endif #endif
csrw CSR_MTVEC, a4 csrw CSR_MTVEC, a4
@@ -432,7 +432,7 @@ _skip_trap_handler_rv32_hyp:
srli a5, a5, ('H' - 'A') srli a5, a5, ('H' - 'A')
andi a5, a5, 0x1 andi a5, a5, 0x1
beq a5, zero, _skip_trap_exit_rv32_hyp beq a5, zero, _skip_trap_exit_rv32_hyp
la a4, _trap_exit_rv32_hyp lla a4, _trap_exit_rv32_hyp
csrr a5, CSR_MSCRATCH csrr a5, CSR_MSCRATCH
REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(a5) REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(a5)
_skip_trap_exit_rv32_hyp: _skip_trap_exit_rv32_hyp:
@@ -468,7 +468,7 @@ _hartid_to_scratch:
* t1 -> HART Stack End * t1 -> HART Stack End
* t2 -> Temporary * t2 -> Temporary
*/ */
la t2, platform lla t2, platform
#if __riscv_xlen == 64 #if __riscv_xlen == 64
lwu t0, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(t2) lwu t0, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(t2)
lwu t2, SBI_PLATFORM_HART_COUNT_OFFSET(t2) lwu t2, SBI_PLATFORM_HART_COUNT_OFFSET(t2)
@@ -478,7 +478,7 @@ _hartid_to_scratch:
#endif #endif
sub t2, t2, a1 sub t2, t2, a1
mul t2, t2, t0 mul t2, t2, t0
la t1, _fw_end lla t1, _fw_end
add t1, t1, t2 add t1, t1, t2
li t2, SBI_SCRATCH_SIZE li t2, SBI_SCRATCH_SIZE
sub a0, t1, t2 sub a0, t1, t2

View File

@@ -54,7 +54,7 @@ fw_boot_hart:
*/ */
fw_save_info: fw_save_info:
/* Save next arg1 in 'a1' */ /* Save next arg1 in 'a1' */
la a4, _dynamic_next_arg1 lla a4, _dynamic_next_arg1
REG_S a1, (a4) REG_S a1, (a4)
/* Sanity checks */ /* Sanity checks */
@@ -66,13 +66,13 @@ fw_save_info:
bgt a3, a4, _bad_dynamic_info bgt a3, a4, _bad_dynamic_info
/* Save version == 0x1 fields */ /* Save version == 0x1 fields */
la a4, _dynamic_next_addr lla a4, _dynamic_next_addr
REG_L a3, FW_DYNAMIC_INFO_NEXT_ADDR_OFFSET(a2) REG_L a3, FW_DYNAMIC_INFO_NEXT_ADDR_OFFSET(a2)
REG_S a3, (a4) REG_S a3, (a4)
la a4, _dynamic_next_mode lla a4, _dynamic_next_mode
REG_L a3, FW_DYNAMIC_INFO_NEXT_MODE_OFFSET(a2) REG_L a3, FW_DYNAMIC_INFO_NEXT_MODE_OFFSET(a2)
REG_S a3, (a4) REG_S a3, (a4)
la a4, _dynamic_options lla a4, _dynamic_options
REG_L a3, FW_DYNAMIC_INFO_OPTIONS_OFFSET(a2) REG_L a3, FW_DYNAMIC_INFO_OPTIONS_OFFSET(a2)
REG_S a3, (a4) REG_S a3, (a4)
@@ -80,7 +80,7 @@ fw_save_info:
li a4, 0x2 li a4, 0x2
REG_L a3, FW_DYNAMIC_INFO_VERSION_OFFSET(a2) REG_L a3, FW_DYNAMIC_INFO_VERSION_OFFSET(a2)
blt a3, a4, 2f blt a3, a4, 2f
la a4, _dynamic_boot_hart lla a4, _dynamic_boot_hart
REG_L a3, FW_DYNAMIC_INFO_BOOT_HART_OFFSET(a2) REG_L a3, FW_DYNAMIC_INFO_BOOT_HART_OFFSET(a2)
REG_S a3, (a4) REG_S a3, (a4)
2: 2:
@@ -96,7 +96,7 @@ fw_save_info:
* The next arg1 should be returned in 'a0'. * The next arg1 should be returned in 'a0'.
*/ */
fw_next_arg1: fw_next_arg1:
la a0, _dynamic_next_arg1 lla a0, _dynamic_next_arg1
REG_L a0, (a0) REG_L a0, (a0)
ret ret
@@ -108,7 +108,7 @@ fw_next_arg1:
* The next address should be returned in 'a0'. * The next address should be returned in 'a0'.
*/ */
fw_next_addr: fw_next_addr:
la a0, _dynamic_next_addr lla a0, _dynamic_next_addr
REG_L a0, (a0) REG_L a0, (a0)
ret ret
@@ -120,7 +120,7 @@ fw_next_addr:
* The next address should be returned in 'a0' * The next address should be returned in 'a0'
*/ */
fw_next_mode: fw_next_mode:
la a0, _dynamic_next_mode lla a0, _dynamic_next_mode
REG_L a0, (a0) REG_L a0, (a0)
ret ret
@@ -133,7 +133,7 @@ fw_next_mode:
* The next address should be returned in 'a0'. * The next address should be returned in 'a0'.
*/ */
fw_options: fw_options:
la a0, _dynamic_options lla a0, _dynamic_options
REG_L a0, (a0) REG_L a0, (a0)
ret ret

View File

@@ -59,7 +59,7 @@ fw_next_arg1:
* The next address should be returned in 'a0'. * The next address should be returned in 'a0'.
*/ */
fw_next_addr: fw_next_addr:
la a0, _jump_addr lla a0, _jump_addr
REG_L a0, (a0) REG_L a0, (a0)
ret ret

View File

@@ -59,7 +59,7 @@ fw_next_arg1:
* The next address should be returned in 'a0'. * The next address should be returned in 'a0'.
*/ */
fw_next_addr: fw_next_addr:
la a0, payload_bin lla a0, payload_bin
ret ret
.section .entry, "ax", %progbits .section .entry, "ax", %progbits

View File

@@ -28,20 +28,20 @@
.globl _start .globl _start
_start: _start:
/* Pick one hart to run the main boot sequence */ /* Pick one hart to run the main boot sequence */
la a3, _hart_lottery lla a3, _hart_lottery
li a2, 1 li a2, 1
amoadd.w a3, a2, (a3) amoadd.w a3, a2, (a3)
bnez a3, _start_hang bnez a3, _start_hang
/* Save a0 and a1 */ /* Save a0 and a1 */
la a3, _boot_a0 lla a3, _boot_a0
REG_S a0, 0(a3) REG_S a0, 0(a3)
la a3, _boot_a1 lla a3, _boot_a1
REG_S a1, 0(a3) REG_S a1, 0(a3)
/* Zero-out BSS */ /* Zero-out BSS */
la a4, _bss_start lla a4, _bss_start
la a5, _bss_end lla a5, _bss_end
_bss_zero: _bss_zero:
REG_S zero, (a4) REG_S zero, (a4)
add a4, a4, __SIZEOF_POINTER__ add a4, a4, __SIZEOF_POINTER__
@@ -53,18 +53,18 @@ _start_warm:
csrw CSR_SIP, zero csrw CSR_SIP, zero
/* Setup exception vectors */ /* Setup exception vectors */
la a3, _start_hang lla a3, _start_hang
csrw CSR_STVEC, a3 csrw CSR_STVEC, a3
/* Setup stack */ /* Setup stack */
la a3, _payload_end lla a3, _payload_end
li a4, 0x2000 li a4, 0x2000
add sp, a3, a4 add sp, a3, a4
/* Jump to C main */ /* Jump to C main */
la a3, _boot_a0 lla a3, _boot_a0
REG_L a0, 0(a3) REG_L a0, 0(a3)
la a3, _boot_a1 lla a3, _boot_a1
REG_L a1, 0(a3) REG_L a1, 0(a3)
call test_main call test_main