diff --git a/port/threadx_smp/CMakeLists.txt b/port/threadx_smp/CMakeLists.txt index 8331d65..9bf33f1 100644 --- a/port/threadx_smp/CMakeLists.txt +++ b/port/threadx_smp/CMakeLists.txt @@ -11,9 +11,21 @@ set(THREADX_SMP_CUSTOM_INC ${CMAKE_CURRENT_SOURCE_DIR}/../moonlight ) set(THREADX_SMP_CUSTOM_SRC - src/tx_thread_smp_core_preempt.c src/tx_initialize_low_level.S + src/tx_thread_context_restore.S + src/tx_thread_context_save.S src/tx_thread_schedule.S + src/tx_thread_smp_core_get.S + src/tx_thread_smp_core_preempt.c + src/tx_thread_smp_current_state_get.S + src/tx_thread_smp_current_thread_get.S + src/tx_thread_smp_initialize_wait.S + src/tx_thread_smp_low_level_initialize.S + src/tx_thread_smp_protect.S + src/tx_thread_smp_time_get.c + src/tx_thread_smp_unprotect.S + src/tx_thread_stack_build.S + src/tx_thread_system_return.S ) set(THREADX_SMP_GENERATED_INC_DIR ${CMAKE_CURRENT_BINARY_DIR}/generated) set(THREADX_SMP_OFFSET_SOURCE ${CMAKE_CURRENT_SOURCE_DIR}/src/tx_asm_offsets.c) @@ -56,22 +68,22 @@ add_custom_command( OUTPUT ${THREADX_SMP_OFFSET_INC} COMMAND ${CMAKE_COMMAND} -E make_directory ${THREADX_SMP_GENERATED_INC_DIR} COMMAND ${CMAKE_C_COMPILER} - ${CMAKE_C_COMPILER_ARG1} - ${THREADX_SMP_OFFSET_CFLAGS} - ${THREADX_SMP_OFFSET_INCLUDE_ARGS} - ${THREADX_SMP_OFFSET_DEFINE_ARGS} - -S - -o ${THREADX_SMP_OFFSET_ASM} - ${THREADX_SMP_OFFSET_SOURCE} + ${CMAKE_C_COMPILER_ARG1} + ${THREADX_SMP_OFFSET_CFLAGS} + ${THREADX_SMP_OFFSET_INCLUDE_ARGS} + ${THREADX_SMP_OFFSET_DEFINE_ARGS} + -S + -o ${THREADX_SMP_OFFSET_ASM} + ${THREADX_SMP_OFFSET_SOURCE} COMMAND ${CMAKE_COMMAND} - -DINPUT=${THREADX_SMP_OFFSET_ASM} - -DOUTPUT=${THREADX_SMP_OFFSET_INC} - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/GenerateAsmOffsets.cmake + -DINPUT=${THREADX_SMP_OFFSET_ASM} + -DOUTPUT=${THREADX_SMP_OFFSET_INC} + -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/GenerateAsmOffsets.cmake DEPENDS - ${THREADX_SMP_OFFSET_SOURCE} - ${CMAKE_CURRENT_SOURCE_DIR}/cmake/GenerateAsmOffsets.cmake - ${CMAKE_CURRENT_SOURCE_DIR}/inc/tx_port.h - ${THREADX_COMMON_SMP_DIR}/inc/tx_api.h + ${THREADX_SMP_OFFSET_SOURCE} + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/GenerateAsmOffsets.cmake + ${CMAKE_CURRENT_SOURCE_DIR}/inc/tx_port.h + ${THREADX_COMMON_SMP_DIR}/inc/tx_api.h COMMAND_EXPAND_LISTS VERBATIM ) diff --git a/port/threadx_smp/inc/csr.h b/port/threadx_smp/inc/csr.h new file mode 100644 index 0000000..3b64b4f --- /dev/null +++ b/port/threadx_smp/inc/csr.h @@ -0,0 +1,371 @@ +/*************************************************************************** + * Copyright (c) 2024 Microsoft Corporation + * + * This program and the accompanying materials are made available under the + * terms of the MIT License which is available at + * https://opensource.org/licenses/MIT. + * + * SPDX-License-Identifier: MIT + **************************************************************************/ + +#ifndef CSR_H +#define CSR_H + +// Machine Status Register, mstatus +#define MSTATUS_MPP_MASK (3L << 11) // previous mode. +#define MSTATUS_MPP_M (3L << 11) +#define MSTATUS_MPP_S (1L << 11) +#define MSTATUS_MPP_U (0L << 11) +#define MSTATUS_MIE (1L << 3) // machine-mode interrupt enable. +#define MSTATUS_MPIE (1L << 7) +#define MSTATUS_FS (1L << 13) + +// Machine-mode Interrupt Enable +#define MIE_MTIE (1L << 7) +#define MIE_MSIE (1L << 3) +#define MIE_MEIE (1L << 11) +#define MIE_STIE (1L << 5) // supervisor timer +#define MIE_SSIE (1L << 1) +#define MIE_SEIE (1L << 9) + +// Supervisor Status Register, sstatus +#define SSTATUS_SPP (1L << 8) // Previous mode, 1=Supervisor, 0=User +#define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable +#define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable +#define SSTATUS_SIE (1L << 1) // Supervisor Interrupt Enable +#define SSTATUS_UIE (1L << 0) // User Interrupt Enable +#define SSTATUS_SPIE (1L << 5) +#define SSTATUS_UPIE (1L << 4) + +// Supervisor Interrupt Enable +#define SIE_SEIE (1L << 9) // external +#define SIE_STIE (1L << 5) // timer +#define SIE_SSIE (1L << 1) // software + +#ifndef __ASSEMBLER__ + +#include + +static inline uint64_t riscv_get_core() +{ + uint64_t x; + asm volatile("csrr %0, mhartid" : "=r"(x)); + return x; +} + +static inline uint64_t riscv_get_mstatus() +{ + uint64_t x; + asm volatile("csrr %0, mstatus" : "=r"(x)); + return x; +} + +static inline void riscv_writ_mstatus(uint64_t x) +{ + asm volatile("csrw mstatus, %0" : : "r"(x)); +} + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void riscv_writ_mepc(uint64_t x) +{ + asm volatile("csrw mepc, %0" : : "r"(x)); +} + +static inline uint64_t riscv_get_sstatus() +{ + uint64_t x; + asm volatile("csrr %0, sstatus" : "=r"(x)); + return x; +} + +static inline void riscv_writ_sstatus(uint64_t x) +{ + asm volatile("csrw sstatus, %0" : : "r"(x)); +} + +// Supervisor Interrupt Pending +static inline uint64_t riscv_get_sip() +{ + uint64_t x; + asm volatile("csrr %0, sip" : "=r"(x)); + return x; +} + +static inline void riscv_writ_sip(uint64_t x) +{ + asm volatile("csrw sip, %0" : : "r"(x)); +} + +static inline uint64_t riscv_get_sie() +{ + uint64_t x; + asm volatile("csrr %0, sie" : "=r"(x)); + return x; +} + +static inline void riscv_writ_sie(uint64_t x) +{ + asm volatile("csrw sie, %0" : : "r"(x)); +} + +static inline uint64_t riscv_get_mie() +{ + uint64_t x; + asm volatile("csrr %0, mie" : "=r"(x)); + return x; +} + +static inline void riscv_writ_mie(uint64_t x) +{ + asm volatile("csrw mie, %0" : : "r"(x)); +} + +// supervisor exception program counter, holds the +// instruction address to which a return from +// exception will go. +static inline void riscv_writ_sepc(uint64_t x) +{ + asm volatile("csrw sepc, %0" : : "r"(x)); +} + +static inline uint64_t riscv_get_sepc() +{ + uint64_t x; + asm volatile("csrr %0, sepc" : "=r"(x)); + return x; +} + +// Machine Exception Delegation +static inline uint64_t riscv_get_medeleg() +{ + uint64_t x; + asm volatile("csrr %0, medeleg" : "=r"(x)); + return x; +} + +static inline void riscv_writ_medeleg(uint64_t x) +{ + asm volatile("csrw medeleg, %0" : : "r"(x)); +} + +// Machine Interrupt Delegation +static inline uint64_t riscv_get_mideleg() +{ + uint64_t x; + asm volatile("csrr %0, mideleg" : "=r"(x)); + return x; +} + +static inline void riscv_writ_mideleg(uint64_t x) +{ + asm volatile("csrw mideleg, %0" : : "r"(x)); +} + +// Supervisor Trap-Vector Base Address +// low two bits are mode. +static inline void riscv_writ_stvec(uint64_t x) +{ + asm volatile("csrw stvec, %0" : : "r"(x)); +} + +static inline uint64_t riscv_get_stvec() +{ + uint64_t x; + asm volatile("csrr %0, stvec" : "=r"(x)); + return x; +} + +// Supervisor Timer Comparison Register +static inline uint64_t riscv_get_stimecmp() +{ + uint64_t x; + // asm volatile("csrr %0, stimecmp" : "=r" (x) ); + asm volatile("csrr %0, 0x14d" : "=r"(x)); + return x; +} + +static inline void riscv_writ_stimecmp(uint64_t x) +{ + // asm volatile("csrw stimecmp, %0" : : "r" (x)); + asm volatile("csrw 0x14d, %0" : : "r"(x)); +} + +// Machine Environment Configuration Register +static inline uint64_t riscv_get_menvcfg() +{ + uint64_t x; + // asm volatile("csrr %0, menvcfg" : "=r" (x) ); + asm volatile("csrr %0, 0x30a" : "=r"(x)); + return x; +} + +static inline void riscv_writ_menvcfg(uint64_t x) +{ + // asm volatile("csrw menvcfg, %0" : : "r" (x)); + asm volatile("csrw 0x30a, %0" : : "r"(x)); +} + +// Physical Memory Protection +static inline void riscv_writ_pmpcfg0(uint64_t x) +{ + asm volatile("csrw pmpcfg0, %0" : : "r"(x)); +} + +static inline void riscv_writ_pmpaddr0(uint64_t x) +{ + asm volatile("csrw pmpaddr0, %0" : : "r"(x)); +} + +// supervisor address translation and protection; +// holds the address of the page table. +static inline void riscv_writ_satp(uint64_t x) +{ + asm volatile("csrw satp, %0" : : "r"(x)); +} + +static inline uint64_t riscv_get_satp() +{ + uint64_t x; + asm volatile("csrr %0, satp" : "=r"(x)); + return x; +} + +// Supervisor Trap Cause +static inline uint64_t riscv_get_scause() +{ + uint64_t x; + asm volatile("csrr %0, scause" : "=r"(x)); + return x; +} + +// Supervisor Trap Value +static inline uint64_t riscv_get_stval() +{ + uint64_t x; + asm volatile("csrr %0, stval" : "=r"(x)); + return x; +} + +// Machine-mode Counter-Enable +static inline void riscv_writ_mcounteren(uint64_t x) +{ + asm volatile("csrw mcounteren, %0" : : "r"(x)); +} + +static inline uint64_t riscv_get_mcounteren() +{ + uint64_t x; + asm volatile("csrr %0, mcounteren" : "=r"(x)); + return x; +} + +// machine-mode cycle counter +static inline uint64_t riscv_get_time() +{ + uint64_t x; + asm volatile("csrr %0, time" : "=r"(x)); + return x; +} + +// enable device interrupts +static inline void riscv_sintr_on() +{ + uint64_t sstatus = riscv_get_sstatus(); + sstatus |= SSTATUS_SIE; + riscv_writ_sstatus(sstatus); +} + +// disable device interrupts +static inline void riscv_sintr_off() +{ + uint64_t sstatus = riscv_get_sstatus(); + sstatus &= (~SSTATUS_SIE); + riscv_writ_sstatus(sstatus); +} + +// are device interrupts enabled? +static inline int riscv_sintr_get() +{ + uint64_t x = riscv_get_sstatus(); + return (x & SSTATUS_SIE) != 0; +} + +static inline void riscv_sintr_restore(int x) +{ + if (x) + riscv_sintr_on(); + else + riscv_sintr_off(); +} + +// enable device interrupts +static inline void riscv_mintr_on() +{ + uint64_t mstatus = riscv_get_mstatus(); + mstatus |= MSTATUS_MIE; + riscv_writ_mstatus(mstatus); +} + +// disable device interrupts +static inline void riscv_mintr_off() +{ + uint64_t mstatus = riscv_get_mstatus(); + mstatus &= (~MSTATUS_MIE); + riscv_writ_mstatus(mstatus); +} + +// are device interrupts enabled? +static inline int riscv_mintr_get() +{ + uint64_t x = riscv_get_mstatus(); + return (x & MSTATUS_MIE) != 0; +} + +static inline void riscv_mintr_restore(int x) +{ + if (x) + riscv_mintr_on(); + else + riscv_mintr_off(); +} + +static inline uint64_t riscv_get_sp() +{ + uint64_t x; + asm volatile("mv %0, sp" : "=r"(x)); + return x; +} + +// read and write tp, the thread pointer, which xv6 uses to hold +// this core's hartid (core number), the index into cpus[]. +static inline uint64_t riscv_get_tp() +{ + uint64_t x; + asm volatile("mv %0, tp" : "=r"(x)); + return x; +} + +static inline void riscv_writ_tp(uint64_t x) +{ + asm volatile("mv tp, %0" : : "r"(x)); +} + +static inline uint64_t riscv_get_ra() +{ + uint64_t x; + asm volatile("mv %0, ra" : "=r"(x)); + return x; +} + +// flush the TLB. +static inline void sfence_vma() +{ + // the zero, zero means flush all TLB entries. + asm volatile("sfence.vma zero, zero"); +} + +#endif // __ASSEMBLER__ + +#endif diff --git a/port/threadx_smp/src/tx_thread_context_restore.S b/port/threadx_smp/src/tx_thread_context_restore.S new file mode 100644 index 0000000..8171155 --- /dev/null +++ b/port/threadx_smp/src/tx_thread_context_restore.S @@ -0,0 +1,402 @@ +/*************************************************************************** + * Copyright (c) 2024 Microsoft Corporation + * + * This program and the accompanying materials are made available under the + * terms of the MIT License which is available at + * https://opensource.org/licenses/MIT. + * + * SPDX-License-Identifier: MIT + **************************************************************************/ + + +/**************************************************************************/ +/**************************************************************************/ +/** */ +/** ThreadX Component */ +/** */ +/** Thread */ +/** */ +/**************************************************************************/ +/**************************************************************************/ + +#include "tx_port.h" + + .section .text +/**************************************************************************/ +/* */ +/* FUNCTION RELEASE */ +/* */ +/* _tx_thread_context_restore RISC-V64/GNU */ +/* 6.2.1 */ +/* AUTHOR */ +/* */ +/* Scott Larson, Microsoft Corporation */ +/* */ +/* DESCRIPTION */ +/* */ +/* This function restores the interrupt context if it is processing a */ +/* nested interrupt. If not, it returns to the interrupt thread if no */ +/* preemption is necessary. Otherwise, if preemption is necessary or */ +/* if no thread was running, the function returns to the scheduler. */ +/* */ +/* INPUT */ +/* */ +/* None */ +/* */ +/* OUTPUT */ +/* */ +/* None */ +/* */ +/* CALLS */ +/* */ +/* _tx_thread_schedule Thread scheduling routine */ +/* */ +/* CALLED BY */ +/* */ +/* ISRs Interrupt Service Routines */ +/* */ +/* RELEASE HISTORY */ +/* */ +/* DATE NAME DESCRIPTION */ +/* */ +/* 03-08-2023 Scott Larson Initial Version 6.2.1 */ +/* */ +/**************************************************************************/ +/* VOID _tx_thread_context_restore(VOID) +{ */ + .global _tx_thread_context_restore +_tx_thread_context_restore: + + /* Lockout interrupts. */ + + csrci mstatus, 0x08 // Disable interrupts + +#ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY + call _tx_execution_isr_exit // Call the ISR execution exit function +#endif + + /* Determine if interrupts are nested. */ + /* if (--_tx_thread_system_state) + { */ + + csrr t3, mhartid // Pickup current hart ID + slli t4, t3, 2 // Build per-hart ULONG offset + slli t5, t3, LOG_REGBYTES // Build per-hart pointer offset + la t0, _tx_thread_system_state // Pickup base of system-state array + add t0, t0, t4 // Select this hart's system-state slot + lw t1, 0(t0) // Pickup nested interrupt count + addi t1, t1, -1 // Decrement the nested interrupt counter + sw t1, 0(t0) // Store new nested count + beqz t1, _tx_thread_not_nested_restore // If 0, not nested restore + + /* Interrupts are nested. */ + + /* Just recover the saved registers and return to the point of + interrupt. */ + + /* Recover floating point registers. */ +#if defined(__riscv_float_abi_single) + flw f0, 31*REGBYTES(sp) // Recover ft0 + flw f1, 32*REGBYTES(sp) // Recover ft1 + flw f2, 33*REGBYTES(sp) // Recover ft2 + flw f3, 34*REGBYTES(sp) // Recover ft3 + flw f4, 35*REGBYTES(sp) // Recover ft4 + flw f5, 36*REGBYTES(sp) // Recover ft5 + flw f6, 37*REGBYTES(sp) // Recover ft6 + flw f7, 38*REGBYTES(sp) // Recover ft7 + flw f10,41*REGBYTES(sp) // Recover fa0 + flw f11,42*REGBYTES(sp) // Recover fa1 + flw f12,43*REGBYTES(sp) // Recover fa2 + flw f13,44*REGBYTES(sp) // Recover fa3 + flw f14,45*REGBYTES(sp) // Recover fa4 + flw f15,46*REGBYTES(sp) // Recover fa5 + flw f16,47*REGBYTES(sp) // Recover fa6 + flw f17,48*REGBYTES(sp) // Recover fa7 + flw f28,59*REGBYTES(sp) // Recover ft8 + flw f29,60*REGBYTES(sp) // Recover ft9 + flw f30,61*REGBYTES(sp) // Recover ft10 + flw f31,62*REGBYTES(sp) // Recover ft11 + lw t0, 63*REGBYTES(sp) // Recover fcsr + csrw fcsr, t0 // +#elif defined(__riscv_float_abi_double) + fld f0, 31*REGBYTES(sp) // Recover ft0 + fld f1, 32*REGBYTES(sp) // Recover ft1 + fld f2, 33*REGBYTES(sp) // Recover ft2 + fld f3, 34*REGBYTES(sp) // Recover ft3 + fld f4, 35*REGBYTES(sp) // Recover ft4 + fld f5, 36*REGBYTES(sp) // Recover ft5 + fld f6, 37*REGBYTES(sp) // Recover ft6 + fld f7, 38*REGBYTES(sp) // Recover ft7 + fld f10,41*REGBYTES(sp) // Recover fa0 + fld f11,42*REGBYTES(sp) // Recover fa1 + fld f12,43*REGBYTES(sp) // Recover fa2 + fld f13,44*REGBYTES(sp) // Recover fa3 + fld f14,45*REGBYTES(sp) // Recover fa4 + fld f15,46*REGBYTES(sp) // Recover fa5 + fld f16,47*REGBYTES(sp) // Recover fa6 + fld f17,48*REGBYTES(sp) // Recover fa7 + fld f28,59*REGBYTES(sp) // Recover ft8 + fld f29,60*REGBYTES(sp) // Recover ft9 + fld f30,61*REGBYTES(sp) // Recover ft10 + fld f31,62*REGBYTES(sp) // Recover ft11 + LOAD t0, 63*REGBYTES(sp) // Recover fcsr + csrw fcsr, t0 // +#endif + + /* Recover standard registers. */ + + /* Restore registers, + Skip global pointer because that does not change. + Also skip the saved registers since they have been restored by any function we called, + except s0 since we use it ourselves. */ + + LOAD t0, 30*REGBYTES(sp) // Recover mepc + csrw mepc, t0 // Setup mepc + li t0, 0x1880 // Prepare MPIP +#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) + li t1, 1<<13 + or t0, t1, t0 +#endif + csrw mstatus, t0 // Enable MPIP + + LOAD x1, 28*REGBYTES(sp) // Recover RA + LOAD x5, 19*REGBYTES(sp) // Recover t0 + LOAD x6, 18*REGBYTES(sp) // Recover t1 + LOAD x7, 17*REGBYTES(sp) // Recover t2 + LOAD x8, 12*REGBYTES(sp) // Recover s0 + LOAD x10, 27*REGBYTES(sp) // Recover a0 + LOAD x11, 26*REGBYTES(sp) // Recover a1 + LOAD x12, 25*REGBYTES(sp) // Recover a2 + LOAD x13, 24*REGBYTES(sp) // Recover a3 + LOAD x14, 23*REGBYTES(sp) // Recover a4 + LOAD x15, 22*REGBYTES(sp) // Recover a5 + LOAD x16, 21*REGBYTES(sp) // Recover a6 + LOAD x17, 20*REGBYTES(sp) // Recover a7 + LOAD x28, 16*REGBYTES(sp) // Recover t3 + LOAD x29, 15*REGBYTES(sp) // Recover t4 + LOAD x30, 14*REGBYTES(sp) // Recover t5 + LOAD x31, 13*REGBYTES(sp) // Recover t6 + +#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) + addi sp, sp, 65*REGBYTES // Recover stack frame - with floating point enabled +#else + addi sp, sp, 32*REGBYTES // Recover stack frame - without floating point enabled +#endif + mret // Return to point of interrupt + + /* } */ +_tx_thread_not_nested_restore: + /* Determine if a thread was interrupted and no preemption is required. */ + /* else if (((_tx_thread_current_ptr) && (_tx_thread_current_ptr == _tx_thread_execute_ptr) + || (_tx_thread_preempt_disable)) + { */ + + la t0, _tx_thread_current_ptr // Pickup base of current-thread array + add t0, t0, t5 // Select this hart's current-thread slot + LOAD t1, 0(t0) // Pickup current thread pointer + beqz t1, _tx_thread_idle_system_restore // If NULL, idle system restore + + la t0, _tx_thread_execute_ptr // Pickup base of execute-thread array + add t0, t0, t5 // Select this hart's execute-thread slot + LOAD t2, 0(t0) // Pickup thread execute pointer + beq t1, t2, _tx_thread_no_preempt_restore // Same thread selected, no preemption + + la t0, _tx_thread_smp_protection // Pickup protection structure + lw t2, 4(t0) // Pickup owning hart + bne t2, t3, _tx_thread_preempt_restore // If owned by another hart, preempt + + LOAD t2, _tx_thread_preempt_disable // Pickup preempt disable flag + bgtz t2, _tx_thread_no_preempt_restore // If set, restore interrupted thread + + +_tx_thread_no_preempt_restore: + /* Restore interrupted thread or ISR. */ + + /* Pickup the saved stack pointer. */ + /* SP = _tx_thread_current_ptr -> tx_thread_stack_ptr; */ + + LOAD sp, 2*REGBYTES(t1) // Switch back to thread's stack + + /* Recover floating point registers. */ +#if defined(__riscv_float_abi_single) + flw f0, 31*REGBYTES(sp) // Recover ft0 + flw f1, 32*REGBYTES(sp) // Recover ft1 + flw f2, 33*REGBYTES(sp) // Recover ft2 + flw f3, 34*REGBYTES(sp) // Recover ft3 + flw f4, 35*REGBYTES(sp) // Recover ft4 + flw f5, 36*REGBYTES(sp) // Recover ft5 + flw f6, 37*REGBYTES(sp) // Recover ft6 + flw f7, 38*REGBYTES(sp) // Recover ft7 + flw f10,41*REGBYTES(sp) // Recover fa0 + flw f11,42*REGBYTES(sp) // Recover fa1 + flw f12,43*REGBYTES(sp) // Recover fa2 + flw f13,44*REGBYTES(sp) // Recover fa3 + flw f14,45*REGBYTES(sp) // Recover fa4 + flw f15,46*REGBYTES(sp) // Recover fa5 + flw f16,47*REGBYTES(sp) // Recover fa6 + flw f17,48*REGBYTES(sp) // Recover fa7 + flw f28,59*REGBYTES(sp) // Recover ft8 + flw f29,60*REGBYTES(sp) // Recover ft9 + flw f30,61*REGBYTES(sp) // Recover ft10 + flw f31,62*REGBYTES(sp) // Recover ft11 + lw t0, 63*REGBYTES(sp) // Recover fcsr + csrw fcsr, t0 // +#elif defined(__riscv_float_abi_double) + fld f0, 31*REGBYTES(sp) // Recover ft0 + fld f1, 32*REGBYTES(sp) // Recover ft1 + fld f2, 33*REGBYTES(sp) // Recover ft2 + fld f3, 34*REGBYTES(sp) // Recover ft3 + fld f4, 35*REGBYTES(sp) // Recover ft4 + fld f5, 36*REGBYTES(sp) // Recover ft5 + fld f6, 37*REGBYTES(sp) // Recover ft6 + fld f7, 38*REGBYTES(sp) // Recover ft7 + fld f10,41*REGBYTES(sp) // Recover fa0 + fld f11,42*REGBYTES(sp) // Recover fa1 + fld f12,43*REGBYTES(sp) // Recover fa2 + fld f13,44*REGBYTES(sp) // Recover fa3 + fld f14,45*REGBYTES(sp) // Recover fa4 + fld f15,46*REGBYTES(sp) // Recover fa5 + fld f16,47*REGBYTES(sp) // Recover fa6 + fld f17,48*REGBYTES(sp) // Recover fa7 + fld f28,59*REGBYTES(sp) // Recover ft8 + fld f29,60*REGBYTES(sp) // Recover ft9 + fld f30,61*REGBYTES(sp) // Recover ft10 + fld f31,62*REGBYTES(sp) // Recover ft11 + LOAD t0, 63*REGBYTES(sp) // Recover fcsr + csrw fcsr, t0 // +#endif + + /* Recover the saved context and return to the point of interrupt. */ + + /* Recover standard registers. */ + /* Restore registers, + Skip global pointer because that does not change */ + + LOAD t0, 30*REGBYTES(sp) // Recover mepc + csrw mepc, t0 // Setup mepc + li t0, 0x1880 // Prepare MPIP +#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) + li t1, 1<<13 + or t0, t1, t0 +#endif + csrw mstatus, t0 // Enable MPIP + + LOAD x1, 28*REGBYTES(sp) // Recover RA + LOAD x5, 19*REGBYTES(sp) // Recover t0 + LOAD x6, 18*REGBYTES(sp) // Recover t1 + LOAD x7, 17*REGBYTES(sp) // Recover t2 + LOAD x8, 12*REGBYTES(sp) // Recover s0 + LOAD x10, 27*REGBYTES(sp) // Recover a0 + LOAD x11, 26*REGBYTES(sp) // Recover a1 + LOAD x12, 25*REGBYTES(sp) // Recover a2 + LOAD x13, 24*REGBYTES(sp) // Recover a3 + LOAD x14, 23*REGBYTES(sp) // Recover a4 + LOAD x15, 22*REGBYTES(sp) // Recover a5 + LOAD x16, 21*REGBYTES(sp) // Recover a6 + LOAD x17, 20*REGBYTES(sp) // Recover a7 + LOAD x28, 16*REGBYTES(sp) // Recover t3 + LOAD x29, 15*REGBYTES(sp) // Recover t4 + LOAD x30, 14*REGBYTES(sp) // Recover t5 + LOAD x31, 13*REGBYTES(sp) // Recover t6 + +#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) + addi sp, sp, 65*REGBYTES // Recover stack frame - with floating point enabled +#else + addi sp, sp, 32*REGBYTES // Recover stack frame - without floating point enabled +#endif + mret // Return to point of interrupt + + /* } + else + { */ +_tx_thread_preempt_restore: + /* Instead of directly activating the thread again, ensure we save the + entire stack frame by saving the remaining registers. */ + + LOAD t0, 2*REGBYTES(t1) // Pickup thread's stack pointer + ori t3, x0, 1 // Build interrupt stack type + STORE t3, 0(t0) // Store stack type + + /* Store floating point preserved registers. */ +#ifdef __riscv_float_abi_single + fsw f8, 39*REGBYTES(t0) // Store fs0 + fsw f9, 40*REGBYTES(t0) // Store fs1 + fsw f18, 49*REGBYTES(t0) // Store fs2 + fsw f19, 50*REGBYTES(t0) // Store fs3 + fsw f20, 51*REGBYTES(t0) // Store fs4 + fsw f21, 52*REGBYTES(t0) // Store fs5 + fsw f22, 53*REGBYTES(t0) // Store fs6 + fsw f23, 54*REGBYTES(t0) // Store fs7 + fsw f24, 55*REGBYTES(t0) // Store fs8 + fsw f25, 56*REGBYTES(t0) // Store fs9 + fsw f26, 57*REGBYTES(t0) // Store fs10 + fsw f27, 58*REGBYTES(t0) // Store fs11 +#elif defined(__riscv_float_abi_double) + fsd f8, 39*REGBYTES(t0) // Store fs0 + fsd f9, 40*REGBYTES(t0) // Store fs1 + fsd f18, 49*REGBYTES(t0) // Store fs2 + fsd f19, 50*REGBYTES(t0) // Store fs3 + fsd f20, 51*REGBYTES(t0) // Store fs4 + fsd f21, 52*REGBYTES(t0) // Store fs5 + fsd f22, 53*REGBYTES(t0) // Store fs6 + fsd f23, 54*REGBYTES(t0) // Store fs7 + fsd f24, 55*REGBYTES(t0) // Store fs8 + fsd f25, 56*REGBYTES(t0) // Store fs9 + fsd f26, 57*REGBYTES(t0) // Store fs10 + fsd f27, 58*REGBYTES(t0) // Store fs11 +#endif + + /* Store standard preserved registers. */ + + STORE x9, 11*REGBYTES(t0) // Store s1 + STORE x18, 10*REGBYTES(t0) // Store s2 + STORE x19, 9*REGBYTES(t0) // Store s3 + STORE x20, 8*REGBYTES(t0) // Store s4 + STORE x21, 7*REGBYTES(t0) // Store s5 + STORE x22, 6*REGBYTES(t0) // Store s6 + STORE x23, 5*REGBYTES(t0) // Store s7 + STORE x24, 4*REGBYTES(t0) // Store s8 + STORE x25, 3*REGBYTES(t0) // Store s9 + STORE x26, 2*REGBYTES(t0) // Store s10 + STORE x27, 1*REGBYTES(t0) // Store s11 + // Note: s0 is already stored! + + /* Save the remaining time-slice and disable it. */ + /* if (_tx_timer_time_slice) + { */ + + la t0, _tx_timer_time_slice // Pickup base of time-slice array + add t0, t0, t4 // Select this hart's time-slice slot + lw t2, 0(t0) // Pickup time slice + beqz t2, _tx_thread_dont_save_ts // If 0, skip time slice processing + + /* _tx_thread_current_ptr -> tx_thread_time_slice = _tx_timer_time_slice + _tx_timer_time_slice = 0; */ + + sw t2, TX_THREAD_TIME_SLICE_OFFSET(t1) // Save current time slice + sw x0, 0(t0) // Clear global time slice + + + /* } */ +_tx_thread_dont_save_ts: + /* Clear the current task pointer. */ + /* _tx_thread_current_ptr = TX_NULL; */ + + /* Return to the scheduler. */ + /* _tx_thread_schedule(); */ + + la t0, _tx_thread_current_ptr // Pickup base of current-thread array + add t0, t0, t5 // Select this hart's current-thread slot + STORE x0, 0(t0) // Clear current thread pointer + + fence rw, rw // Publish current-thread clear before ready token + addi t0, t1, TX_THREAD_SMP_LOCK_READY_BIT_OFFSET // Pickup lock/ready-bit address + li t2, 1 // Rebuild ready token + amoswap.w.rl x0, t2, (t0) // Set thread ready token for reschedule + /* } */ + +_tx_thread_idle_system_restore: + /* Just return back to the scheduler! */ + j _tx_thread_schedule // Return to scheduler + +/* } */ diff --git a/port/threadx_smp/src/tx_thread_context_save.S b/port/threadx_smp/src/tx_thread_context_save.S new file mode 100644 index 0000000..bddb7df --- /dev/null +++ b/port/threadx_smp/src/tx_thread_context_save.S @@ -0,0 +1,289 @@ +/*************************************************************************** + * Copyright (c) 2024 Microsoft Corporation + * + * This program and the accompanying materials are made available under the + * terms of the MIT License which is available at + * https://opensource.org/licenses/MIT. + * + * SPDX-License-Identifier: MIT + **************************************************************************/ + + +/**************************************************************************/ +/**************************************************************************/ +/** */ +/** ThreadX Component */ +/** */ +/** Thread */ +/** */ +/**************************************************************************/ +/**************************************************************************/ + +#include "tx_port.h" + + .section .text +/**************************************************************************/ +/* */ +/* FUNCTION RELEASE */ +/* */ +/* _tx_thread_context_save RISC-V64/GNU */ +/* 6.2.1 */ +/* AUTHOR */ +/* */ +/* Scott Larson, Microsoft Corporation */ +/* */ +/* DESCRIPTION */ +/* */ +/* This function saves the context of an executing thread in the */ +/* beginning of interrupt processing. The function also ensures that */ +/* the system stack is used upon return to the calling ISR. */ +/* */ +/* INPUT */ +/* */ +/* None */ +/* */ +/* OUTPUT */ +/* */ +/* None */ +/* */ +/* CALLS */ +/* */ +/* None */ +/* */ +/* CALLED BY */ +/* */ +/* ISRs */ +/* */ +/* RELEASE HISTORY */ +/* */ +/* DATE NAME DESCRIPTION */ +/* */ +/* 03-08-2023 Scott Larson Initial Version 6.2.1 */ +/* */ +/**************************************************************************/ +/* VOID _tx_thread_context_save(VOID) +{ */ + .global _tx_thread_context_save +_tx_thread_context_save: + + /* Upon entry to this routine, it is assumed that interrupts are locked + out and the interrupt stack fame has been allocated and x1 (ra) has + been saved on the stack. */ + + STORE t0, 19*REGBYTES(sp) // Save t0 + STORE t1, 18*REGBYTES(sp) // Save t1 + STORE x7, 17*REGBYTES(sp) // Save t2 before reusing it + STORE x28, 16*REGBYTES(sp) // Save t3 before reusing it + STORE x29, 15*REGBYTES(sp) // Save t4 before reusing it + + csrr t2, mhartid // Pickup current hart ID + slli t3, t2, 2 // Build per-hart ULONG offset + slli t4, t2, LOG_REGBYTES // Build per-hart pointer offset + la t1, _tx_thread_system_state // Pickup base of system state array + add t0, t1, t3 // Select this hart's system-state slot + lw t1, 0(t0) // Pickup system state + + /* Check for a nested interrupt condition. */ + /* if (_tx_thread_system_state++) + { */ + beqz t1, _tx_thread_not_nested_save // If 0, first interrupt condition + addi t1, t1, 1 // Increment the interrupt counter + sw t1, 0(t0) // Store the interrupt counter + + /* Nested interrupt condition. + Save the reset of the scratch registers on the stack and return to the + calling ISR. */ + + STORE x8, 12*REGBYTES(sp) // Store s0 + STORE x10, 27*REGBYTES(sp) // Store a0 + STORE x11, 26*REGBYTES(sp) // Store a1 + STORE x12, 25*REGBYTES(sp) // Store a2 + STORE x13, 24*REGBYTES(sp) // Store a3 + STORE x14, 23*REGBYTES(sp) // Store a4 + STORE x15, 22*REGBYTES(sp) // Store a5 + STORE x16, 21*REGBYTES(sp) // Store a6 + STORE x17, 20*REGBYTES(sp) // Store a7 + STORE x30, 14*REGBYTES(sp) // Store t5 + STORE x31, 13*REGBYTES(sp) // Store t6 + csrr t0, mepc // Load exception program counter + STORE t0, 30*REGBYTES(sp) // Save it on the stack + + /* Save floating point scratch registers. */ +#if defined(__riscv_float_abi_single) + fsw f0, 31*REGBYTES(sp) // Store ft0 + fsw f1, 32*REGBYTES(sp) // Store ft1 + fsw f2, 33*REGBYTES(sp) // Store ft2 + fsw f3, 34*REGBYTES(sp) // Store ft3 + fsw f4, 35*REGBYTES(sp) // Store ft4 + fsw f5, 36*REGBYTES(sp) // Store ft5 + fsw f6, 37*REGBYTES(sp) // Store ft6 + fsw f7, 38*REGBYTES(sp) // Store ft7 + fsw f10,41*REGBYTES(sp) // Store fa0 + fsw f11,42*REGBYTES(sp) // Store fa1 + fsw f12,43*REGBYTES(sp) // Store fa2 + fsw f13,44*REGBYTES(sp) // Store fa3 + fsw f14,45*REGBYTES(sp) // Store fa4 + fsw f15,46*REGBYTES(sp) // Store fa5 + fsw f16,47*REGBYTES(sp) // Store fa6 + fsw f17,48*REGBYTES(sp) // Store fa7 + fsw f28,59*REGBYTES(sp) // Store ft8 + fsw f29,60*REGBYTES(sp) // Store ft9 + fsw f30,61*REGBYTES(sp) // Store ft10 + fsw f31,62*REGBYTES(sp) // Store ft11 + csrr t0, fcsr + STORE t0, 63*REGBYTES(sp) // Store fcsr +#elif defined(__riscv_float_abi_double) + fsd f0, 31*REGBYTES(sp) // Store ft0 + fsd f1, 32*REGBYTES(sp) // Store ft1 + fsd f2, 33*REGBYTES(sp) // Store ft2 + fsd f3, 34*REGBYTES(sp) // Store ft3 + fsd f4, 35*REGBYTES(sp) // Store ft4 + fsd f5, 36*REGBYTES(sp) // Store ft5 + fsd f6, 37*REGBYTES(sp) // Store ft6 + fsd f7, 38*REGBYTES(sp) // Store ft7 + fsd f10,41*REGBYTES(sp) // Store fa0 + fsd f11,42*REGBYTES(sp) // Store fa1 + fsd f12,43*REGBYTES(sp) // Store fa2 + fsd f13,44*REGBYTES(sp) // Store fa3 + fsd f14,45*REGBYTES(sp) // Store fa4 + fsd f15,46*REGBYTES(sp) // Store fa5 + fsd f16,47*REGBYTES(sp) // Store fa6 + fsd f17,48*REGBYTES(sp) // Store fa7 + fsd f28,59*REGBYTES(sp) // Store ft8 + fsd f29,60*REGBYTES(sp) // Store ft9 + fsd f30,61*REGBYTES(sp) // Store ft10 + fsd f31,62*REGBYTES(sp) // Store ft11 + csrr t0, fcsr + STORE t0, 63*REGBYTES(sp) // Store fcsr +#endif + +#ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY + call _tx_execution_isr_enter // Call the ISR execution enter function +#endif + + ret // Return to calling ISR + +_tx_thread_not_nested_save: + /* } */ + + /* Otherwise, not nested, check to see if a thread was running. */ + /* else if (_tx_thread_current_ptr) + { */ + addi t1, t1, 1 // Increment the interrupt counter + sw t1, 0(t0) // Store the interrupt counter + + /* Not nested: Find the user thread that was running and load our SP */ + + la t1, _tx_thread_current_ptr // Pickup base of current-thread array + add t0, t1, t4 // Select this hart's current-thread slot + LOAD t0, 0(t0) // Pickup current thread pointer + beqz t0, _tx_thread_idle_system_save // If NULL, idle system was interrupted + + /* Save the standard scratch registers. */ + + STORE x8, 12*REGBYTES(sp) // Store s0 + STORE x10, 27*REGBYTES(sp) // Store a0 + STORE x11, 26*REGBYTES(sp) // Store a1 + STORE x12, 25*REGBYTES(sp) // Store a2 + STORE x13, 24*REGBYTES(sp) // Store a3 + STORE x14, 23*REGBYTES(sp) // Store a4 + STORE x15, 22*REGBYTES(sp) // Store a5 + STORE x16, 21*REGBYTES(sp) // Store a6 + STORE x17, 20*REGBYTES(sp) // Store a7 + STORE x30, 14*REGBYTES(sp) // Store t5 + STORE x31, 13*REGBYTES(sp) // Store t6 + + csrr t0, mepc // Load exception program counter + STORE t0, 30*REGBYTES(sp) // Save it on the stack + + /* Save floating point scratch registers. */ +#if defined(__riscv_float_abi_single) + fsw f0, 31*REGBYTES(sp) // Store ft0 + fsw f1, 32*REGBYTES(sp) // Store ft1 + fsw f2, 33*REGBYTES(sp) // Store ft2 + fsw f3, 34*REGBYTES(sp) // Store ft3 + fsw f4, 35*REGBYTES(sp) // Store ft4 + fsw f5, 36*REGBYTES(sp) // Store ft5 + fsw f6, 37*REGBYTES(sp) // Store ft6 + fsw f7, 38*REGBYTES(sp) // Store ft7 + fsw f10,41*REGBYTES(sp) // Store fa0 + fsw f11,42*REGBYTES(sp) // Store fa1 + fsw f12,43*REGBYTES(sp) // Store fa2 + fsw f13,44*REGBYTES(sp) // Store fa3 + fsw f14,45*REGBYTES(sp) // Store fa4 + fsw f15,46*REGBYTES(sp) // Store fa5 + fsw f16,47*REGBYTES(sp) // Store fa6 + fsw f17,48*REGBYTES(sp) // Store fa7 + fsw f28,59*REGBYTES(sp) // Store ft8 + fsw f29,60*REGBYTES(sp) // Store ft9 + fsw f30,61*REGBYTES(sp) // Store ft10 + fsw f31,62*REGBYTES(sp) // Store ft11 + csrr t0, fcsr + STORE t0, 63*REGBYTES(sp) // Store fcsr +#elif defined(__riscv_float_abi_double) + fsd f0, 31*REGBYTES(sp) // Store ft0 + fsd f1, 32*REGBYTES(sp) // Store ft1 + fsd f2, 33*REGBYTES(sp) // Store ft2 + fsd f3, 34*REGBYTES(sp) // Store ft3 + fsd f4, 35*REGBYTES(sp) // Store ft4 + fsd f5, 36*REGBYTES(sp) // Store ft5 + fsd f6, 37*REGBYTES(sp) // Store ft6 + fsd f7, 38*REGBYTES(sp) // Store ft7 + fsd f10,41*REGBYTES(sp) // Store fa0 + fsd f11,42*REGBYTES(sp) // Store fa1 + fsd f12,43*REGBYTES(sp) // Store fa2 + fsd f13,44*REGBYTES(sp) // Store fa3 + fsd f14,45*REGBYTES(sp) // Store fa4 + fsd f15,46*REGBYTES(sp) // Store fa5 + fsd f16,47*REGBYTES(sp) // Store fa6 + fsd f17,48*REGBYTES(sp) // Store fa7 + fsd f28,59*REGBYTES(sp) // Store ft8 + fsd f29,60*REGBYTES(sp) // Store ft9 + fsd f30,61*REGBYTES(sp) // Store ft10 + fsd f31,62*REGBYTES(sp) // Store ft11 + csrr t0, fcsr + STORE t0, 63*REGBYTES(sp) // Store fcsr +#endif + + /* Save the current stack pointer in the thread's control block. */ + /* _tx_thread_current_ptr -> tx_thread_stack_ptr = sp; */ + + /* Switch to the system stack. */ + /* sp = _tx_thread_system_stack_ptr; */ + + la t0, _tx_thread_current_ptr // Pickup base of current-thread array + add t0, t0, t4 // Select this hart's current-thread slot + LOAD t1, 0(t0) // Pickup current thread pointer + STORE sp, 2*REGBYTES(t1) // Save stack pointer + +#ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY + /* _tx_execution_isr_enter is called with thread stack pointer */ + call _tx_execution_isr_enter // Call the ISR execution enter function +#endif + + la t0, _tx_thread_system_stack_ptr // Pickup base of system-stack array + add t0, t0, t4 // Select this hart's system-stack slot + LOAD sp, 0(t0) // Switch to system stack + ret // Return to calling ISR + + /* } + else + { */ + +_tx_thread_idle_system_save: + + +#ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY + call _tx_execution_isr_enter // Call the ISR execution enter function +#endif + + /* Interrupt occurred in the scheduling loop. */ + + /* } +} */ +#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) + addi sp, sp, 65*REGBYTES // Recover stack frame - with floating point enabled +#else + addi sp, sp, 32*REGBYTES // Recover the reserved stack space +#endif + ret // Return to calling ISR diff --git a/port/threadx_smp/src/tx_thread_smp_core_get.S b/port/threadx_smp/src/tx_thread_smp_core_get.S new file mode 100644 index 0000000..fd89b7a --- /dev/null +++ b/port/threadx_smp/src/tx_thread_smp_core_get.S @@ -0,0 +1,12 @@ +#include "tx_port.h" + + .section .text + .align 2 + + .global _tx_thread_smp_core_get + .type _tx_thread_smp_core_get, @function +_tx_thread_smp_core_get: + csrr a0, mhartid // Pickup the current hart ID + ret + + diff --git a/port/threadx_smp/src/tx_thread_smp_current_state_get.S b/port/threadx_smp/src/tx_thread_smp_current_state_get.S new file mode 100644 index 0000000..4fea92a --- /dev/null +++ b/port/threadx_smp/src/tx_thread_smp_current_state_get.S @@ -0,0 +1,14 @@ +#include "tx_port.h" + + .section .text + .align 2 + + .global _tx_thread_smp_current_state_get + .type _tx_thread_smp_current_state_get, @function +_tx_thread_smp_current_state_get: + csrr t0, mhartid // Pickup current hart ID + la t1, _tx_thread_system_state // Base of per-hart system-state array + slli t0, t0, 2 // Build offset into array + add t1, t1, t0 // Select this hart's slot + LWU a0, 0(t1) // Return current system state + ret diff --git a/port/threadx_smp/src/tx_thread_smp_current_thread_get.S b/port/threadx_smp/src/tx_thread_smp_current_thread_get.S new file mode 100644 index 0000000..6f6ec97 --- /dev/null +++ b/port/threadx_smp/src/tx_thread_smp_current_thread_get.S @@ -0,0 +1,19 @@ +#include "tx_port.h" + + .section .text + .align 2 + + .global _tx_thread_smp_current_thread_get + .type _tx_thread_smp_current_thread_get, @function +_tx_thread_smp_current_thread_get: + csrr t0, mstatus // Pickup current interrupt posture + csrci mstatus, 0x08 // Lockout interrupts + + csrr t1, mhartid // Pickup current hart ID + la t2, _tx_thread_current_ptr // Base of current-thread pointer array + slli t1, t1, LOG_REGBYTES // Build per-hart byte offset + add t2, t2, t1 // Select this hart's slot + LOAD a0, 0(t2) // Pickup current thread pointer + + csrw mstatus, t0 // Restore interrupt posture + ret diff --git a/port/threadx_smp/src/tx_thread_smp_initialize_wait.S b/port/threadx_smp/src/tx_thread_smp_initialize_wait.S new file mode 100644 index 0000000..db78833 --- /dev/null +++ b/port/threadx_smp/src/tx_thread_smp_initialize_wait.S @@ -0,0 +1,23 @@ +#include "tx_port.h" + + .section .text + .align 2 + + .global _tx_thread_smp_initialize_wait + .type _tx_thread_smp_initialize_wait, @function +_tx_thread_smp_initialize_wait: + + /* Core 0 continues initialization. All other harts wait until the + release flag is set by the low-level SMP initialization path. */ + csrr t0, mhartid // Pickup current hart ID + beqz t0, _tx_thread_smp_initialize_done // Core 0 does not wait + + la t1, _tx_thread_smp_release_cores_flag // Release flag address + +_tx_thread_smp_initialize_wait_loop: + LWU t2, 0(t1) // Pickup release flag + bnez t2, _tx_thread_smp_initialize_done // Exit once core 0 releases secondaries + j _tx_thread_smp_initialize_wait_loop // Keep waiting + +_tx_thread_smp_initialize_done: + ret diff --git a/port/threadx_smp/src/tx_thread_smp_low_level_initialize.S b/port/threadx_smp/src/tx_thread_smp_low_level_initialize.S new file mode 100644 index 0000000..f2f7357 --- /dev/null +++ b/port/threadx_smp/src/tx_thread_smp_low_level_initialize.S @@ -0,0 +1,31 @@ +#include "tx_port.h" + + .section .text + .align 2 + + .global _tx_thread_smp_low_level_initialize + .type _tx_thread_smp_low_level_initialize, @function +_tx_thread_smp_low_level_initialize: + + /* Only hart 0 performs low-level SMP initialization. */ + csrr t0, mhartid // Pickup current hart ID + bnez t0, _tx_thread_smp_low_level_initialize_done + + /* Keep secondary harts parked until kernel-enter releases them. */ + la t1, _tx_thread_smp_release_cores_flag + sw x0, 0(t1) // Clear release flag + +#ifdef TX_THREAD_SMP_DYNAMIC_CORE_MAX + /* Record the detected core count supplied by the caller. */ + la t1, _tx_thread_smp_detected_cores + sw a0, 0(t1) +#endif + + /* Platform-specific secondary-hart startup can go here. + This is where core 0 would program boot addresses, send startup IPIs, + or otherwise bring the other harts online. */ + + fence rw, rw // Publish startup state + +_tx_thread_smp_low_level_initialize_done: + ret diff --git a/port/threadx_smp/src/tx_thread_smp_protect.S b/port/threadx_smp/src/tx_thread_smp_protect.S new file mode 100644 index 0000000..4618e3c --- /dev/null +++ b/port/threadx_smp/src/tx_thread_smp_protect.S @@ -0,0 +1,50 @@ +#include "tx_port.h" + + .section .text + .align 2 + + .global _tx_thread_smp_protect + .type _tx_thread_smp_protect, @function +_tx_thread_smp_protect: + + /* Disable interrupts so we don't get preempted. */ + csrr a0, mstatus // Pickup current interrupt posture + csrci mstatus, 0x08 // Lockout interrupts + + /* Pickup the hart ID. */ + csrr t2, mhartid // Pickup the current hart ID + + /* Build address to protection structure. */ + la t1, _tx_thread_smp_protection + + /* If this hart already owns protection, just nest the count. */ + LWU t3, 4(t1) // Pickup owning hart + beq t3, t2, _owned // Already owned by this hart + + /* Try to get the protection. */ + LWU t4, 0(t1) // Pickup protection flag + beqz t4, _get_protection // If clear, try to claim it + + /* Protection is busy. Restore interrupts and retry. */ + csrw mstatus, a0 // Restore interrupts + j _tx_thread_smp_protect // Restart the protection attempt + +_get_protection: + li t4, 1 // Build lock value + amoswap.w.aq t5, t4, (t1) // Attempt to get protection + bnez t5, _protection_busy // If old value != 0, retry + +_got_protection: + fence rw, rw // Ensure lock acquisition is visible + sw t2, 4(t1) // Save owning hart + +_owned: + LWU t5, 8(t1) // Pickup ownership count + addi t5, t5, 1 // Increment ownership count + sw t5, 8(t1) // Store ownership count + fence rw, rw // Publish owner/count before return + ret + +_protection_busy: + csrw mstatus, a0 // Restore interrupts + j _tx_thread_smp_protect // Restart the protection attempt diff --git a/port/threadx_smp/src/tx_thread_smp_time_get.c b/port/threadx_smp/src/tx_thread_smp_time_get.c new file mode 100644 index 0000000..fd1cb36 --- /dev/null +++ b/port/threadx_smp/src/tx_thread_smp_time_get.c @@ -0,0 +1,8 @@ +#include "platform.h" +#include "tx_api.h" +#include + +ULONG _tx_thread_smp_time_get(void) +{ + return (ULONG)get_aclint_mtime(aclint); +} diff --git a/port/threadx_smp/src/tx_thread_smp_unprotect.S b/port/threadx_smp/src/tx_thread_smp_unprotect.S new file mode 100644 index 0000000..431898b --- /dev/null +++ b/port/threadx_smp/src/tx_thread_smp_unprotect.S @@ -0,0 +1,43 @@ +#include "tx_port.h" + + .section .text + .align 2 + + .global _tx_thread_smp_unprotect + .type _tx_thread_smp_unprotect, @function +_tx_thread_smp_unprotect: + + /* Lockout interrupts while protection state is updated. */ + csrci mstatus, 0x08 // Lockout interrupts + + /* Pickup the current hart ID. */ + csrr t1, mhartid // Pickup hart ID + + /* Build address of protection structure. */ + la t2, _tx_thread_smp_protection + + /* Only the owning hart may release the protection. */ + LWU t3, 4(t2) // Pickup owning hart + bne t1, t3, _still_protected // Not owner, skip release + + /* Pickup and decrement the protection count. */ + LWU t3, 8(t2) // Pickup protection count + beqz t3, _still_protected // Already cleared + addi t3, t3, -1 // Decrement protection count + sw t3, 8(t2) // Store new count + bnez t3, _still_protected // Still nested, stay protected + + /* If preemption is disabled, keep protection in force. */ + la t4, _tx_thread_preempt_disable + LWU t5, 0(t4) // Pickup preempt disable + bnez t5, _still_protected // Skip protection release + + /* Release the protection. */ + li t3, -1 // Invalid owner value + sw t3, 4(t2) // Mark owning hart invalid + fence rw, rw // Ensure shared accesses complete + amoswap.w.rl x0, x0, (t2) // Release protection flag + +_still_protected: + csrw mstatus, a0 // Restore interrupt posture + ret diff --git a/port/threadx_smp/src/tx_thread_stack_build.S b/port/threadx_smp/src/tx_thread_stack_build.S new file mode 100644 index 0000000..d11d2bf --- /dev/null +++ b/port/threadx_smp/src/tx_thread_stack_build.S @@ -0,0 +1,229 @@ +/*************************************************************************** + * Copyright (c) 2024 Microsoft Corporation + * + * This program and the accompanying materials are made available under the + * terms of the MIT License which is available at + * https://opensource.org/licenses/MIT. + * + * SPDX-License-Identifier: MIT + **************************************************************************/ + + +/**************************************************************************/ +/**************************************************************************/ +/** */ +/** ThreadX Component */ +/** */ +/** Thread */ +/** */ +/**************************************************************************/ +/**************************************************************************/ + +#include "tx_port.h" + + .section .text +/**************************************************************************/ +/* */ +/* FUNCTION RELEASE */ +/* */ +/* _tx_thread_stack_build RISC-V64/GNU */ +/* 6.2.1 */ +/* AUTHOR */ +/* */ +/* Scott Larson, Microsoft Corporation */ +/* */ +/* DESCRIPTION */ +/* */ +/* This function builds a stack frame on the supplied thread's stack. */ +/* The stack frame results in a fake interrupt return to the supplied */ +/* function pointer. */ +/* */ +/* INPUT */ +/* */ +/* thread_ptr Pointer to thread control blk */ +/* function_ptr Pointer to return function */ +/* */ +/* OUTPUT */ +/* */ +/* None */ +/* */ +/* CALLS */ +/* */ +/* None */ +/* */ +/* CALLED BY */ +/* */ +/* _tx_thread_create Create thread service */ +/* */ +/* RELEASE HISTORY */ +/* */ +/* DATE NAME DESCRIPTION */ +/* */ +/* 03-08-2023 Scott Larson Initial Version 6.2.1 */ +/* */ +/**************************************************************************/ +/* VOID _tx_thread_stack_build(TX_THREAD *thread_ptr, VOID (*function_ptr)(VOID)) +{ */ + .global _tx_thread_stack_build +_tx_thread_stack_build: + + /* Build a fake interrupt frame. The form of the fake interrupt stack + on the RISC-V should look like the following after it is built: + Reg Index + Stack Top: 1 0 Interrupt stack frame type + x27 1 Initial s11 + x26 2 Initial s10 + x25 3 Initial s9 + x24 4 Initial s8 + x23 5 Initial s7 + x22 6 Initial s6 + x21 7 Initial s5 + x20 8 Initial s4 + x19 9 Initial s3 + x18 10 Initial s2 + x9 11 Initial s1 + x8 12 Initial s0 + x31 13 Initial t6 + x30 14 Initial t5 + x29 15 Initial t4 + x28 16 Initial t3 + x7 17 Initial t2 + x6 18 Initial t1 + x5 19 Initial t0 + x17 20 Initial a7 + x16 21 Initial a6 + x15 22 Initial a5 + x14 23 Initial a4 + x13 24 Initial a3 + x12 25 Initial a2 + x11 26 Initial a1 + x10 27 Initial a0 + x1 28 Initial ra + -- 29 reserved + mepc 30 Initial mepc +If floating point support: + f0 31 Inital ft0 + f1 32 Inital ft1 + f2 33 Inital ft2 + f3 34 Inital ft3 + f4 35 Inital ft4 + f5 36 Inital ft5 + f6 37 Inital ft6 + f7 38 Inital ft7 + f8 39 Inital fs0 + f9 40 Inital fs1 + f10 41 Inital fa0 + f11 42 Inital fa1 + f12 43 Inital fa2 + f13 44 Inital fa3 + f14 45 Inital fa4 + f15 46 Inital fa5 + f16 47 Inital fa6 + f17 48 Inital fa7 + f18 49 Inital fs2 + f19 50 Inital fs3 + f20 51 Inital fs4 + f21 52 Inital fs5 + f22 53 Inital fs6 + f23 54 Inital fs7 + f24 55 Inital fs8 + f25 56 Inital fs9 + f26 57 Inital fs10 + f27 58 Inital fs11 + f28 59 Inital ft8 + f29 60 Inital ft9 + f30 61 Inital ft10 + f31 62 Inital ft11 + fscr 63 Inital fscr + + Stack Bottom: (higher memory address) */ + + LOAD t0, TX_THREAD_STACK_END_OFFSET(a0) // Pickup end of stack area + andi t0, t0, -4*REGBYTES // Ensure alignment (16-byte for RV32 & 32-byte for RV64) + + /* Actually build the stack frame. */ + +#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) + addi t0, t0, -65*REGBYTES +#else + addi t0, t0, -32*REGBYTES // Allocate space for the stack frame +#endif + li t1, 1 // Build stack type + STORE t1, 0*REGBYTES(t0) // Place stack type on the top + STORE x0, 1*REGBYTES(t0) // Initial s11 + STORE x0, 2*REGBYTES(t0) // Initial s10 + STORE x0, 3*REGBYTES(t0) // Initial s9 + STORE x0, 4*REGBYTES(t0) // Initial s8 + STORE x0, 5*REGBYTES(t0) // Initial s7 + STORE x0, 6*REGBYTES(t0) // Initial s6 + STORE x0, 7*REGBYTES(t0) // Initial s5 + STORE x0, 8*REGBYTES(t0) // Initial s4 + STORE x0, 9*REGBYTES(t0) // Initial s3 + STORE x0, 10*REGBYTES(t0) // Initial s2 + STORE x0, 11*REGBYTES(t0) // Initial s1 + STORE x0, 12*REGBYTES(t0) // Initial s0 + STORE x0, 13*REGBYTES(t0) // Initial t6 + STORE x0, 14*REGBYTES(t0) // Initial t5 + STORE x0, 15*REGBYTES(t0) // Initial t4 + STORE x0, 16*REGBYTES(t0) // Initial t3 + STORE x0, 17*REGBYTES(t0) // Initial t2 + STORE x0, 18*REGBYTES(t0) // Initial t1 + STORE x0, 19*REGBYTES(t0) // Initial t0 + STORE x0, 20*REGBYTES(t0) // Initial a7 + STORE x0, 21*REGBYTES(t0) // Initial a6 + STORE x0, 22*REGBYTES(t0) // Initial a5 + STORE x0, 23*REGBYTES(t0) // Initial a4 + STORE x0, 24*REGBYTES(t0) // Initial a3 + STORE x0, 25*REGBYTES(t0) // Initial a2 + STORE x0, 26*REGBYTES(t0) // Initial a1 + STORE x0, 27*REGBYTES(t0) // Initial a0 + STORE x0, 28*REGBYTES(t0) // Initial ra + STORE a1, 30*REGBYTES(t0) // Initial mepc +#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) + STORE x0, 31*REGBYTES(t0) // Inital ft0 + STORE x0, 32*REGBYTES(t0) // Inital ft1 + STORE x0, 33*REGBYTES(t0) // Inital ft2 + STORE x0, 34*REGBYTES(t0) // Inital ft3 + STORE x0, 35*REGBYTES(t0) // Inital ft4 + STORE x0, 36*REGBYTES(t0) // Inital ft5 + STORE x0, 37*REGBYTES(t0) // Inital ft6 + STORE x0, 38*REGBYTES(t0) // Inital ft7 + STORE x0, 39*REGBYTES(t0) // Inital fs0 + STORE x0, 40*REGBYTES(t0) // Inital fs1 + STORE x0, 41*REGBYTES(t0) // Inital fa0 + STORE x0, 42*REGBYTES(t0) // Inital fa1 + STORE x0, 43*REGBYTES(t0) // Inital fa2 + STORE x0, 44*REGBYTES(t0) // Inital fa3 + STORE x0, 45*REGBYTES(t0) // Inital fa4 + STORE x0, 46*REGBYTES(t0) // Inital fa5 + STORE x0, 47*REGBYTES(t0) // Inital fa6 + STORE x0, 48*REGBYTES(t0) // Inital fa7 + STORE x0, 49*REGBYTES(t0) // Inital fs2 + STORE x0, 50*REGBYTES(t0) // Inital fs3 + STORE x0, 51*REGBYTES(t0) // Inital fs4 + STORE x0, 52*REGBYTES(t0) // Inital fs5 + STORE x0, 53*REGBYTES(t0) // Inital fs6 + STORE x0, 54*REGBYTES(t0) // Inital fs7 + STORE x0, 55*REGBYTES(t0) // Inital fs8 + STORE x0, 56*REGBYTES(t0) // Inital fs9 + STORE x0, 57*REGBYTES(t0) // Inital fs10 + STORE x0, 58*REGBYTES(t0) // Inital fs11 + STORE x0, 59*REGBYTES(t0) // Inital ft8 + STORE x0, 60*REGBYTES(t0) // Inital ft9 + STORE x0, 61*REGBYTES(t0) // Inital ft10 + STORE x0, 62*REGBYTES(t0) // Inital ft11 + csrr a1, fcsr // Read fcsr and use it for initial value for each thread + STORE a1, 63*REGBYTES(t0) // Initial fscr + STORE x0, 64*REGBYTES(t0) // Reserved word (0) +#else + STORE x0, 31*REGBYTES(t0) // Reserved word (0) +#endif + + /* Setup stack pointer. */ + /* thread_ptr -> tx_thread_stack_ptr = t0; */ + + STORE t0, 2*REGBYTES(a0) // Save stack pointer in thread's + addi t1, x0, 1 // Build ready flag + sw t1, TX_THREAD_SMP_LOCK_READY_BIT_OFFSET(a0) // Set ready flag + ret // control block and return +/* } */ diff --git a/port/threadx_smp/src/tx_thread_system_return.S b/port/threadx_smp/src/tx_thread_system_return.S new file mode 100644 index 0000000..5fc5811 --- /dev/null +++ b/port/threadx_smp/src/tx_thread_system_return.S @@ -0,0 +1,198 @@ +/*************************************************************************** + * Copyright (c) 2024 Microsoft Corporation + * + * This program and the accompanying materials are made available under the + * terms of the MIT License which is available at + * https://opensource.org/licenses/MIT. + * + * SPDX-License-Identifier: MIT + **************************************************************************/ + + +/**************************************************************************/ +/**************************************************************************/ +/** */ +/** ThreadX Component */ +/** */ +/** Thread */ +/** */ +/**************************************************************************/ +/**************************************************************************/ + +#include "tx_port.h" + + .section .text +/**************************************************************************/ +/* */ +/* FUNCTION RELEASE */ +/* */ +/* _tx_thread_system_return RISC-V64/GNU */ +/* 6.2.1 */ +/* AUTHOR */ +/* */ +/* Scott Larson, Microsoft Corporation */ +/* */ +/* DESCRIPTION */ +/* */ +/* This function is target processor specific. It is used to transfer */ +/* control from a thread back to the system. Only a minimal context */ +/* is saved since the compiler assumes temp registers are going to get */ +/* slicked by a function call anyway. */ +/* */ +/* INPUT */ +/* */ +/* None */ +/* */ +/* OUTPUT */ +/* */ +/* None */ +/* */ +/* CALLS */ +/* */ +/* _tx_thread_schedule Thread scheduling loop */ +/* */ +/* CALLED BY */ +/* */ +/* ThreadX components */ +/* */ +/* RELEASE HISTORY */ +/* */ +/* DATE NAME DESCRIPTION */ +/* */ +/* 03-08-2023 Scott Larson Initial Version 6.2.1 */ +/* */ +/**************************************************************************/ +/* VOID _tx_thread_system_return(VOID) +{ */ + .global _tx_thread_system_return +_tx_thread_system_return: + + /* Save minimal context on the stack. */ + +#if defined(__riscv_float_abi_single) || defined(__riscv_float_abi_double) + addi sp, sp, -29*REGBYTES // Allocate space on the stack - with floating point enabled +#else + addi sp, sp, -16*REGBYTES // Allocate space on the stack - without floating point enabled +#endif + + /* Store floating point preserved registers. */ +#if defined(__riscv_float_abi_single) + fsw f8, 15*REGBYTES(sp) // Store fs0 + fsw f9, 16*REGBYTES(sp) // Store fs1 + fsw f18, 17*REGBYTES(sp) // Store fs2 + fsw f19, 18*REGBYTES(sp) // Store fs3 + fsw f20, 19*REGBYTES(sp) // Store fs4 + fsw f21, 20*REGBYTES(sp) // Store fs5 + fsw f22, 21*REGBYTES(sp) // Store fs6 + fsw f23, 22*REGBYTES(sp) // Store fs7 + fsw f24, 23*REGBYTES(sp) // Store fs8 + fsw f25, 24*REGBYTES(sp) // Store fs9 + fsw f26, 25*REGBYTES(sp) // Store fs10 + fsw f27, 26*REGBYTES(sp) // Store fs11 + csrr t0, fcsr + STORE t0, 27*REGBYTES(sp) // Store fcsr +#elif defined(__riscv_float_abi_double) + fsd f8, 15*REGBYTES(sp) // Store fs0 + fsd f9, 16*REGBYTES(sp) // Store fs1 + fsd f18, 17*REGBYTES(sp) // Store fs2 + fsd f19, 18*REGBYTES(sp) // Store fs3 + fsd f20, 19*REGBYTES(sp) // Store fs4 + fsd f21, 20*REGBYTES(sp) // Store fs5 + fsd f22, 21*REGBYTES(sp) // Store fs6 + fsd f23, 22*REGBYTES(sp) // Store fs7 + fsd f24, 23*REGBYTES(sp) // Store fs8 + fsd f25, 24*REGBYTES(sp) // Store fs9 + fsd f26, 25*REGBYTES(sp) // Store fs10 + fsd f27, 26*REGBYTES(sp) // Store fs11 + csrr t0, fcsr + STORE t0, 27*REGBYTES(sp) // Store fcsr +#endif + + STORE x0, 0(sp) // Solicited stack type + STORE x1, 13*REGBYTES(sp) // Save RA + STORE x8, 12*REGBYTES(sp) // Save s0 + STORE x9, 11*REGBYTES(sp) // Save s1 + STORE x18, 10*REGBYTES(sp) // Save s2 + STORE x19, 9*REGBYTES(sp) // Save s3 + STORE x20, 8*REGBYTES(sp) // Save s4 + STORE x21, 7*REGBYTES(sp) // Save s5 + STORE x22, 6*REGBYTES(sp) // Save s6 + STORE x23, 5*REGBYTES(sp) // Save s7 + STORE x24, 4*REGBYTES(sp) // Save s8 + STORE x25, 3*REGBYTES(sp) // Save s9 + STORE x26, 2*REGBYTES(sp) // Save s10 + STORE x27, 1*REGBYTES(sp) // Save s11 + csrr t0, mstatus // Pickup mstatus + STORE t0, 14*REGBYTES(sp) // Save mstatus + + + /* Lockout interrupts. - will be enabled in _tx_thread_schedule */ + + csrci mstatus, 0xF + +#ifdef TX_ENABLE_EXECUTION_CHANGE_NOTIFY + + call _tx_execution_thread_exit // Call the thread execution exit function +#endif + + csrr t5, mhartid // Pickup current hart ID + slli t6, t5, 2 // Build per-hart ULONG offset + slli t5, t5, LOG_REGBYTES // Build per-hart pointer offset + + la t0, _tx_thread_current_ptr // Pickup base of current-thread array + add t0, t0, t5 // Select this hart's current-thread slot + LOAD t1, 0(t0) // Pickup current thread pointer + la t2, _tx_thread_system_stack_ptr // Pickup base of system-stack array + + /* Save current stack and switch to system stack. */ + /* _tx_thread_current_ptr -> tx_thread_stack_ptr = SP; + SP = _tx_thread_system_stack_ptr; */ + + STORE sp, 2*REGBYTES(t1) // Save stack pointer + add t2, t2, t5 // Select this hart's system-stack slot + LOAD sp, 0(t2) // Switch to system stack + + /* Determine if the time-slice is active. */ + /* if (_tx_timer_time_slice) + { */ + + la t4, _tx_timer_time_slice // Pickup base of time-slice array + add t4, t4, t6 // Select this hart's time-slice slot + lw t3, 0(t4) // Pickup time slice value + la t2, _tx_thread_schedule // Pickup address of scheduling loop + beqz t3, _tx_thread_dont_save_ts // If no time-slice, don't save it + + /* Save time-slice for the thread and clear the current time-slice. */ + /* _tx_thread_current_ptr -> tx_thread_time_slice = _tx_timer_time_slice; + _tx_timer_time_slice = 0; */ + + sw t3, TX_THREAD_TIME_SLICE_OFFSET(t1) // Save current time-slice for thread + sw x0, 0(t4) // Clear time-slice variable + + /* } */ +_tx_thread_dont_save_ts: + + /* Clear the current thread pointer. */ + /* _tx_thread_current_ptr = TX_NULL; */ + + STORE x0, 0(t0) // Clear current thread pointer + + /* Make the thread runnable again before returning to the scheduler. */ + fence rw, rw // Publish current-thread clear before ready token + addi t3, t1, TX_THREAD_SMP_LOCK_READY_BIT_OFFSET // Pickup lock/ready-bit address + li t4, 1 // Build ready token + amoswap.w.rl x0, t4, (t3) // Restore ready token + + /* Clear protection state. */ + la t3, _tx_thread_preempt_disable // Pickup preempt-disable address + sw x0, 0(t3) // Clear preempt disable flag + + la t3, _tx_thread_smp_protection // Pickup protection structure + sw x0, 8(t3) // Clear protection count + li t4, -1 // Build invalid owner value + sw t4, 4(t3) // Invalidate owning hart + fence rw, rw // Ensure shared accesses complete before unlock + sw x0, 0(t3) // Clear protection in-force flag + jr t2 // Return to thread scheduler + +/* } */