From e68918c2e83521cc08c9d67988e2dd5dd2a86fbd Mon Sep 17 00:00:00 2001 From: Eyck Jentzsch Date: Fri, 9 Jul 2021 07:37:12 +0200 Subject: [PATCH] fix instruction decode --- gen_input/templates/interp/CORENAME.cpp.gtl | 87 +++++++++------------ incl/iss/arch/riscv_hart_m_p.h | 4 +- src/vm/interp/vm_tgc_c.cpp | 87 +++++++++------------ 3 files changed, 78 insertions(+), 100 deletions(-) diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index 4975854..0351533 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -85,6 +85,7 @@ protected: inline const char *name(size_t index){return traits::reg_aliases.at(index);} + compile_func decode_inst(code_word_t instr) ; virt_addr_t execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit) override; // some compile time constants @@ -98,46 +99,13 @@ protected: std::array lut_00, lut_01, lut_10; std::array lut_11; - std::array qlut; + struct instruction_pattern { + uint32_t value; + uint32_t mask; + compile_func opc; + }; - std::array lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}}; - - void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[], - compile_func f) { - if (pos < 0) { - lut[idx] = f; - } else { - auto bitmask = 1UL << pos; - if ((mask & bitmask) == 0) { - expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f); - } else { - if ((valid & bitmask) == 0) { - expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f); - expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f); - } else { - auto new_val = idx << 1; - if ((value & bitmask) != 0) new_val++; - expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f); - } - } - } - } - - inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); } - - uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) { - if (pos >= 0) { - auto bitmask = 1UL << pos; - if ((mask & bitmask) == 0) { - lut_val = extract_fields(pos - 1, val, mask, lut_val); - } else { - auto new_val = lut_val << 1; - if ((val & bitmask) != 0) new_val++; - lut_val = extract_fields(pos - 1, val, mask, new_val); - } - } - return lut_val; - } + std::array, 4> qlut; inline void raise(uint16_t trap_id, uint16_t cause){ auto trap_val = 0x80ULL << 24 | (cause << 16) | trap_id; @@ -296,22 +264,46 @@ template void debug_fn(CODE_WORD insn) { template vm_impl::vm_impl() { this(new ARCH()); } +// according to +// https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation +#ifdef __GCC__ +constexpr size_t bit_count(uint32_t u) { return __builtin_popcount(u); } +#elif __cplusplus < 201402L +constexpr size_t uCount(uint32_t u) { return u - ((u >> 1) & 033333333333) - ((u >> 2) & 011111111111); } +constexpr size_t bit_count(uint32_t u) { return ((uCount(u) + (uCount(u) >> 3)) & 030707070707) % 63; } +#else +constexpr size_t bit_count(uint32_t u) { + size_t uCount = u - ((u >> 1) & 033333333333) - ((u >> 2) & 011111111111); + return ((uCount + (uCount >> 3)) & 030707070707) % 63; +} +#endif + template vm_impl::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) : vm_base(core, core_id, cluster_id) { - qlut[0] = lut_00.data(); - qlut[1] = lut_01.data(); - qlut[2] = lut_10.data(); - qlut[3] = lut_11.data(); for (auto instr : instr_descr) { - auto quantrant = instr.value & 0x3; - expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op); + auto quadrant = instr.value & 0x3; + qlut[quadrant].push_back(instruction_pattern{instr.value, instr.mask, instr.op}); + } + for(auto& lut: qlut){ + std::sort(std::begin(lut), std::end(lut), [](instruction_pattern const& a, instruction_pattern const& b){ + return bit_count(a.mask) > bit_count(b.mask); + }); } } inline bool is_count_limit_enabled(finish_cond_e cond){ return (cond & finish_cond_e::COUNT_LIMIT) == finish_cond_e::COUNT_LIMIT; } + +template +typename vm_impl::compile_func vm_impl::decode_inst(code_word_t instr){ + for(auto& e: qlut[instr&0x3]){ + if(!((instr&e.mask) ^ e.value )) return e.opc; + } + return &this_class::illegal_intruction; +} + template typename vm_base::virt_addr_t vm_impl::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){ // we fetch at max 4 byte, alignment is 2 @@ -329,10 +321,7 @@ typename vm_base::virt_addr_t vm_impl::execute_inst(finish_cond_e co } if ((cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF && (insn == 0x0000006f || (insn&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0' - auto lut_val = extract_fields(insn); - auto f = qlut[insn & 0x3][lut_val]; - if (!f) - f = &this_class::illegal_intruction; + auto f = decode_inst(insn); pc = (this->*f)(pc, insn); } return pc; diff --git a/incl/iss/arch/riscv_hart_m_p.h b/incl/iss/arch/riscv_hart_m_p.h index b02f999..ace8d1f 100644 --- a/incl/iss/arch/riscv_hart_m_p.h +++ b/incl/iss/arch/riscv_hart_m_p.h @@ -144,10 +144,10 @@ public: mstatus_t mstatus; - static const reg_t mstatus_reset_val = 0; + static const reg_t mstatus_reset_val = 0x1800; void write_mstatus(T val) { - auto mask = get_mask(); + auto mask = get_mask() &0xff; // MPP is hardcode as 0x3 auto new_val = (mstatus.backing.val & ~mask) | (val & mask); mstatus = new_val; } diff --git a/src/vm/interp/vm_tgc_c.cpp b/src/vm/interp/vm_tgc_c.cpp index 9fb7759..c49f59d 100644 --- a/src/vm/interp/vm_tgc_c.cpp +++ b/src/vm/interp/vm_tgc_c.cpp @@ -85,6 +85,7 @@ protected: inline const char *name(size_t index){return traits::reg_aliases.at(index);} + compile_func decode_inst(code_word_t instr) ; virt_addr_t execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit) override; // some compile time constants @@ -98,46 +99,13 @@ protected: std::array lut_00, lut_01, lut_10; std::array lut_11; - std::array qlut; + struct instruction_pattern { + uint32_t value; + uint32_t mask; + compile_func opc; + }; - std::array lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}}; - - void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[], - compile_func f) { - if (pos < 0) { - lut[idx] = f; - } else { - auto bitmask = 1UL << pos; - if ((mask & bitmask) == 0) { - expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f); - } else { - if ((valid & bitmask) == 0) { - expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f); - expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f); - } else { - auto new_val = idx << 1; - if ((value & bitmask) != 0) new_val++; - expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f); - } - } - } - } - - inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); } - - uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) { - if (pos >= 0) { - auto bitmask = 1UL << pos; - if ((mask & bitmask) == 0) { - lut_val = extract_fields(pos - 1, val, mask, lut_val); - } else { - auto new_val = lut_val << 1; - if ((val & bitmask) != 0) new_val++; - lut_val = extract_fields(pos - 1, val, mask, new_val); - } - } - return lut_val; - } + std::array, 4> qlut; inline void raise(uint16_t trap_id, uint16_t cause){ auto trap_val = 0x80ULL << 24 | (cause << 16) | trap_id; @@ -4113,22 +4081,46 @@ template void debug_fn(CODE_WORD insn) { template vm_impl::vm_impl() { this(new ARCH()); } +// according to +// https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation +#ifdef __GCC__ +constexpr size_t bit_count(uint32_t u) { return __builtin_popcount(u); } +#elif __cplusplus < 201402L +constexpr size_t uCount(uint32_t u) { return u - ((u >> 1) & 033333333333) - ((u >> 2) & 011111111111); } +constexpr size_t bit_count(uint32_t u) { return ((uCount(u) + (uCount(u) >> 3)) & 030707070707) % 63; } +#else +constexpr size_t bit_count(uint32_t u) { + size_t uCount = u - ((u >> 1) & 033333333333) - ((u >> 2) & 011111111111); + return ((uCount + (uCount >> 3)) & 030707070707) % 63; +} +#endif + template vm_impl::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) : vm_base(core, core_id, cluster_id) { - qlut[0] = lut_00.data(); - qlut[1] = lut_01.data(); - qlut[2] = lut_10.data(); - qlut[3] = lut_11.data(); for (auto instr : instr_descr) { - auto quantrant = instr.value & 0x3; - expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op); + auto quadrant = instr.value & 0x3; + qlut[quadrant].push_back(instruction_pattern{instr.value, instr.mask, instr.op}); + } + for(auto& lut: qlut){ + std::sort(std::begin(lut), std::end(lut), [](instruction_pattern const& a, instruction_pattern const& b){ + return bit_count(a.mask) > bit_count(b.mask); + }); } } inline bool is_count_limit_enabled(finish_cond_e cond){ return (cond & finish_cond_e::COUNT_LIMIT) == finish_cond_e::COUNT_LIMIT; } + +template +typename vm_impl::compile_func vm_impl::decode_inst(code_word_t instr){ + for(auto& e: qlut[instr&0x3]){ + if(!((instr&e.mask) ^ e.value )) return e.opc; + } + return &this_class::illegal_intruction; +} + template typename vm_base::virt_addr_t vm_impl::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){ // we fetch at max 4 byte, alignment is 2 @@ -4146,10 +4138,7 @@ typename vm_base::virt_addr_t vm_impl::execute_inst(finish_cond_e co } if ((cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF && (insn == 0x0000006f || (insn&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0' - auto lut_val = extract_fields(insn); - auto f = qlut[insn & 0x3][lut_val]; - if (!f) - f = &this_class::illegal_intruction; + auto f = decode_inst(insn); pc = (this->*f)(pc, insn); } return pc;