From 6e52af168bb7984c28e4ed0208aa38720875dde7 Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Sat, 29 Jul 2023 11:42:46 +0200 Subject: [PATCH] adds faster decoding to tcc and cleans up others --- gen_input/templates/interp/CORENAME.cpp.gtl | 7 +- gen_input/templates/tcc/CORENAME.cpp.gtl | 158 ++++++++-------- src/vm/interp/vm_tgc_c.cpp | 7 +- src/vm/tcc/vm_tgc_c.cpp | 195 ++++++++++---------- 4 files changed, 180 insertions(+), 187 deletions(-) diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index 7a7707e..9585627 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -218,7 +218,7 @@ private: }); } } - typename arch::traits::opcode_e decodeInstr(decoding_tree_node* node, code_word_t word){ + typename arch::traits::opcode_e decode_instr(decoding_tree_node* node, code_word_t word){ if(!node->children.size()){ if(node->instrs.size() == 1) return node->instrs[0].op; for(auto instr : node->instrs){ @@ -228,7 +228,7 @@ private: else{ for(auto child : node->children){ if (child->value == (node->submask&word)){ - return decodeInstr(child, word); + return decode_instr(child, word); } } } @@ -260,7 +260,6 @@ constexpr size_t bit_count(uint32_t u) { template vm_impl::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) : vm_base(core, core_id, cluster_id) { - unsigned id=0; root = new decoding_tree_node(std::numeric_limits::max()); for(auto instr:instr_descr){ root->instrs.push_back(instr); @@ -297,7 +296,7 @@ typename vm_base::virt_addr_t vm_impl::execute_inst(finish_cond_e co } else { if (is_jump_to_self_enabled(cond) && (instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0' - auto inst_id = decodeInstr(root, instr); + auto inst_id = decode_instr(root, instr); // pre execution stuff this->core.reg.last_branch = 0; if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast(inst_id)); diff --git a/gen_input/templates/tcc/CORENAME.cpp.gtl b/gen_input/templates/tcc/CORENAME.cpp.gtl index b7cec56..809ae00 100644 --- a/gen_input/templates/tcc/CORENAME.cpp.gtl +++ b/gen_input/templates/tcc/CORENAME.cpp.gtl @@ -121,57 +121,7 @@ protected: } } - // some compile time constants - // enum { MASK16 = 0b1111110001100011, MASK32 = 0b11111111111100000111000001111111 }; - enum { MASK16 = 0b1111111111111111, MASK32 = 0b11111111111100000111000001111111 }; - enum { EXTR_MASK16 = MASK16 >> 2, EXTR_MASK32 = MASK32 >> 2 }; - enum { LUT_SIZE = 1 << util::bit_count(static_cast(EXTR_MASK32)), LUT_SIZE_C = 1 << util::bit_count(static_cast(EXTR_MASK16)) }; - - std::array lut; - - std::array lut_00, lut_01, lut_10; - std::array lut_11; - - std::array qlut; - - std::array lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}}; - - void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[], - compile_func f) { - if (pos < 0) { - lut[idx] = f; - } else { - auto bitmask = 1UL << pos; - if ((mask & bitmask) == 0) { - expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f); - } else { - if ((valid & bitmask) == 0) { - expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f); - expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f); - } else { - auto new_val = idx << 1; - if ((value & bitmask) != 0) new_val++; - expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f); - } - } - } - } - - inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); } - - uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) { - if (pos >= 0) { - auto bitmask = 1UL << pos; - if ((mask & bitmask) == 0) { - lut_val = extract_fields(pos - 1, val, mask, lut_val); - } else { - auto new_val = lut_val << 1; - if ((val & bitmask) != 0) new_val++; - lut_val = extract_fields(pos - 1, val, mask, new_val); - } - } - return lut_val; - } + template::type> inline S sext(U from) { auto mask = (1ULL< instrs; + std::vector children; + uint32_t submask = std::numeric_limits::max(); + uint32_t value; + decoding_tree_node(uint32_t value) : value(value){} + }; - const std::array instr_descr = {{ + decoding_tree_node* root {nullptr}; + + const std::array instr_descr = {{ /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %> /* instruction ${instr.instruction.name}, encoding '${instr.encoding}' */ {${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%> @@ -228,11 +187,64 @@ private: vm_impl::gen_trap_check(tu); return BRANCH; } + + //decoding functionality + + void populate_decoding_tree(decoding_tree_node* root){ + //create submask + for(auto instr: root->instrs){ + root->submask &= instr.mask; + } + //put each instr according to submask&encoding into children + for(auto instr: root->instrs){ + bool foundMatch = false; + for(auto child: root->children){ + //use value as identifying trait + if(child->value == (instr.value&root->submask)){ + child->instrs.push_back(instr); + foundMatch = true; + } + } + if(!foundMatch){ + decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask); + child->instrs.push_back(instr); + root->children.push_back(child); + } + } + root->instrs.clear(); + //call populate_decoding_tree for all children + if(root->children.size() >1) + for(auto child: root->children){ + populate_decoding_tree(child); + } + else{ + //sort instrs by value of the mask, this works bc we want to have the least restrictive one last + std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) { + return instr1.mask > instr2.mask; + }); + } + } + compile_func decode_instr(decoding_tree_node* node, code_word_t word){ + if(!node->children.size()){ + if(node->instrs.size() == 1) return node->instrs[0].op; + for(auto instr : node->instrs){ + if((instr.mask&word) == instr.value) return instr.op; + } + } + else{ + for(auto child : node->children){ + if (child->value == (node->submask&word)){ + return decode_instr(child, word); + } + } + } + return nullptr; + } }; -template void debug_fn(CODE_WORD insn) { - volatile CODE_WORD x = insn; - insn = 2 * x; +template void debug_fn(CODE_WORD instr) { + volatile CODE_WORD x = instr; + instr = 2 * x; } template vm_impl::vm_impl() { this(new ARCH()); } @@ -240,14 +252,11 @@ template vm_impl::vm_impl() { this(new ARCH()); } template vm_impl::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) : vm_base(core, core_id, cluster_id) { - qlut[0] = lut_00.data(); - qlut[1] = lut_01.data(); - qlut[2] = lut_10.data(); - qlut[3] = lut_11.data(); - for (auto instr : instr_descr) { - auto quantrant = instr.value & 0x3; - expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op); + root = new decoding_tree_node(std::numeric_limits::max()); + for(auto instr:instr_descr){ + root->instrs.push_back(instr); } + populate_decoding_tree(root); } template @@ -255,30 +264,19 @@ std::tuple vm_impl::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) { // we fetch at max 4 byte, alignment is 2 enum {TRAP_ID=1<<16}; - code_word_t insn = 0; - // const typename traits::addr_t upper_bits = ~traits::PGMASK; + code_word_t instr = 0; phys_addr_t paddr(pc); - auto *const data = (uint8_t *)&insn; paddr = this->core.v2p(pc); -// if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary -// auto res = this->core.read(paddr, 2, data); -// if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val); -// if ((insn & 0x3) == 0x3) { // this is a 32bit instruction -// res = this->core.read(this->core.v2p(pc + 2), 2, data + 2); -// } -// } else { - auto res = this->core.read(paddr, 4, data); - if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val); -// } - if (insn == 0x0000006f || (insn&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0' + auto res = this->core.read(paddr, 4, reinterpret_cast(&instr)); + if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val); + if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0' // curr pc on stack ++inst_cnt; - auto lut_val = extract_fields(insn); - auto f = qlut[insn & 0x3][lut_val]; + auto f = decode_instr(root, instr); if (f == nullptr) { f = &this_class::illegal_intruction; } - return (this->*f)(pc, insn, tu); + return (this->*f)(pc, instr, tu); } template void vm_impl::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) { diff --git a/src/vm/interp/vm_tgc_c.cpp b/src/vm/interp/vm_tgc_c.cpp index d5817ed..55c75d5 100644 --- a/src/vm/interp/vm_tgc_c.cpp +++ b/src/vm/interp/vm_tgc_c.cpp @@ -298,7 +298,7 @@ private: }); } } - typename arch::traits::opcode_e decodeInstr(decoding_tree_node* node, code_word_t word){ + typename arch::traits::opcode_e decode_instr(decoding_tree_node* node, code_word_t word){ if(!node->children.size()){ if(node->instrs.size() == 1) return node->instrs[0].op; for(auto instr : node->instrs){ @@ -308,7 +308,7 @@ private: else{ for(auto child : node->children){ if (child->value == (node->submask&word)){ - return decodeInstr(child, word); + return decode_instr(child, word); } } } @@ -340,7 +340,6 @@ constexpr size_t bit_count(uint32_t u) { template vm_impl::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) : vm_base(core, core_id, cluster_id) { - unsigned id=0; root = new decoding_tree_node(std::numeric_limits::max()); for(auto instr:instr_descr){ root->instrs.push_back(instr); @@ -377,7 +376,7 @@ typename vm_base::virt_addr_t vm_impl::execute_inst(finish_cond_e co } else { if (is_jump_to_self_enabled(cond) && (instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0' - auto inst_id = decodeInstr(root, instr); + auto inst_id = decode_instr(root, instr); // pre execution stuff this->core.reg.last_branch = 0; if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast(inst_id)); diff --git a/src/vm/tcc/vm_tgc_c.cpp b/src/vm/tcc/vm_tgc_c.cpp index 495d97e..fde3ec3 100644 --- a/src/vm/tcc/vm_tgc_c.cpp +++ b/src/vm/tcc/vm_tgc_c.cpp @@ -121,57 +121,7 @@ protected: } } - // some compile time constants - // enum { MASK16 = 0b1111110001100011, MASK32 = 0b11111111111100000111000001111111 }; - enum { MASK16 = 0b1111111111111111, MASK32 = 0b11111111111100000111000001111111 }; - enum { EXTR_MASK16 = MASK16 >> 2, EXTR_MASK32 = MASK32 >> 2 }; - enum { LUT_SIZE = 1 << util::bit_count(static_cast(EXTR_MASK32)), LUT_SIZE_C = 1 << util::bit_count(static_cast(EXTR_MASK16)) }; - - std::array lut; - - std::array lut_00, lut_01, lut_10; - std::array lut_11; - - std::array qlut; - - std::array lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}}; - - void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[], - compile_func f) { - if (pos < 0) { - lut[idx] = f; - } else { - auto bitmask = 1UL << pos; - if ((mask & bitmask) == 0) { - expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f); - } else { - if ((valid & bitmask) == 0) { - expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f); - expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f); - } else { - auto new_val = idx << 1; - if ((value & bitmask) != 0) new_val++; - expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f); - } - } - } - } - - inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); } - - uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) { - if (pos >= 0) { - auto bitmask = 1UL << pos; - if ((mask & bitmask) == 0) { - lut_val = extract_fields(pos - 1, val, mask, lut_val); - } else { - auto new_val = lut_val << 1; - if ((val & bitmask) != 0) new_val++; - lut_val = extract_fields(pos - 1, val, mask, new_val); - } - } - return lut_val; - } + template::type> inline S sext(U from) { auto mask = (1ULL< instrs; + std::vector children; + uint32_t submask = std::numeric_limits::max(); + uint32_t value; + decoding_tree_node(uint32_t value) : value(value){} + }; - const std::array instr_descr = {{ + decoding_tree_node* root {nullptr}; + + const std::array instr_descr = {{ /* entries are: size, valid value, valid mask, function ptr */ /* instruction LUI, encoding '0b00000000000000000000000000110111' */ {32, 0b00000000000000000000000000110111, 0b00000000000000000000000001111111, &this_class::__lui}, @@ -494,14 +453,14 @@ private: this->gen_raise_trap(tu, 0, 2); } else{ - auto new_pc = tu.assignment(tu.ext((tu.bitwise_and((tu.add(tu.load(rs1+ traits::X0, 0),tu.constant((int16_t)sext<12>(imm),16))),tu.constant(~ 0x1,8))),32,true),32); + auto new_pc = tu.assignment(tu.ext((tu.bitwise_and((tu.add(tu.load(rs1+ traits::X0, 0),tu.constant((int16_t)sext<12>(imm),16))),tu.constant(~0x1,8))),32,true),32); tu.open_if(tu.srem(new_pc,tu.constant(static_cast(traits:: INSTR_ALIGNMENT),32))); this->gen_raise_trap(tu, 0, 0); tu.open_else(); if(rd!= 0) { tu.store(rd + traits::X0,tu.ext((tu.add(tu.ext(cur_pc_val,32,false),tu.constant( 4,8))),32,true)); } - auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(new_pc,tu.constant(~ 0x1,8)),32); + auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(new_pc,tu.constant(~0x1,8)),32); tu.store(traits::NEXT_PC, PC_val_v); tu.store(traits::LAST_BRANCH, tu.constant(2U, 2)); tu.close_scope(); @@ -1963,7 +1922,7 @@ private: else{ auto xrd = tu.assignment(tu.read_mem(traits::CSR, csr, 32),32); if(zimm!= 0) { - tu.write_mem(traits::CSR, csr, tu.bitwise_and(xrd,tu.constant(~ ((uint32_t)zimm),32))); + tu.write_mem(traits::CSR, csr, tu.bitwise_and(xrd,tu.constant(~((uint32_t)zimm),32))); } if(rd!= 0) { tu.store(rd + traits::X0,xrd); @@ -2024,7 +1983,7 @@ private: this->gen_raise_trap(tu, 0, 2); } else{ - auto res = tu.assignment(tu.mul(tu.ext(tu.load(rs1+ traits::X0, 0),32,false),tu.ext(tu.load(rs2+ traits::X0, 0),32,false)),64); + auto res = tu.assignment(tu.ext((tu.mul(tu.ext(tu.ext(tu.load(rs1+ traits::X0, 0),32,true),64,false),tu.ext(tu.ext(tu.load(rs2+ traits::X0, 0),32,true),64,false))),64,false),64); if(rd!=0) { tu.store(rd + traits::X0,tu.ext(res,32,true)); } @@ -2058,7 +2017,7 @@ private: this->gen_raise_trap(tu, 0, 2); } else{ - auto res = tu.assignment(tu.mul(tu.ext(tu.load(rs1+ traits::X0, 0),32,false),tu.ext(tu.load(rs2+ traits::X0, 0),32,false)),64); + auto res = tu.assignment(tu.ext((tu.mul(tu.ext(tu.ext(tu.load(rs1+ traits::X0, 0),32,true),64,false),tu.ext(tu.ext(tu.load(rs2+ traits::X0, 0),32,true),64,false))),64,false),64); if(rd!=0) { tu.store(rd + traits::X0,tu.ext((tu.lshr(res,tu.constant(static_cast(traits:: XLEN),32))),32,true)); } @@ -2092,7 +2051,7 @@ private: this->gen_raise_trap(tu, 0, 2); } else{ - auto res = tu.assignment(tu.mul(tu.ext(tu.load(rs1+ traits::X0, 0),32,false),tu.load(rs2+ traits::X0, 0)),64); + auto res = tu.assignment(tu.ext((tu.mul(tu.ext(tu.ext(tu.load(rs1+ traits::X0, 0),32,true),64,false),tu.ext(tu.load(rs2+ traits::X0, 0),64,true))),64,false),64); if(rd!=0) { tu.store(rd + traits::X0,tu.ext((tu.lshr(res,tu.constant(static_cast(traits:: XLEN),32))),32,true)); } @@ -2126,7 +2085,7 @@ private: this->gen_raise_trap(tu, 0, 2); } else{ - auto res = tu.assignment(tu.mul(tu.load(rs1+ traits::X0, 0),tu.load(rs2+ traits::X0, 0)),64); + auto res = tu.assignment(tu.ext((tu.mul(tu.ext(tu.load(rs1+ traits::X0, 0),64,true),tu.ext(tu.load(rs2+ traits::X0, 0),64,true))),64,true),64); if(rd!=0) { tu.store(rd + traits::X0,tu.ext((tu.lshr(res,tu.constant(static_cast(traits:: XLEN),32))),32,true)); } @@ -2164,13 +2123,13 @@ private: auto divisor = tu.assignment(tu.ext(tu.load(rs2+ traits::X0, 0),32,false),32); if(rd!= 0){ tu.open_if(tu.icmp(ICmpInst::ICMP_NE,divisor,tu.constant( 0,8))); auto MMIN = tu.assignment(tu.constant(((uint32_t)1)<<(static_cast(traits:: XLEN)-1),32),32); - tu.open_if(tu.logical_and(tu.icmp(ICmpInst::ICMP_EQ,tu.load(rs1+ traits::X0, 0),MMIN),tu.icmp(ICmpInst::ICMP_EQ,divisor,tu.constant(- 1,8)))); + tu.open_if(tu.logical_and(tu.icmp(ICmpInst::ICMP_EQ,tu.load(rs1+ traits::X0, 0),MMIN),tu.icmp(ICmpInst::ICMP_EQ,divisor,tu.constant(-1,8)))); tu.store(rd + traits::X0,MMIN); tu.open_else(); tu.store(rd + traits::X0,tu.ext((tu.sdiv(dividend,divisor)),32,true)); tu.close_scope(); tu.open_else(); - tu.store(rd + traits::X0,tu.constant((uint32_t)- 1,32)); + tu.store(rd + traits::X0,tu.constant((uint32_t)-1,32)); tu.close_scope(); } } @@ -2209,7 +2168,7 @@ private: } tu.open_else(); if(rd!=0) { - tu.store(rd + traits::X0,tu.constant((uint32_t)- 1,32)); + tu.store(rd + traits::X0,tu.constant((uint32_t)-1,32)); } tu.close_scope(); } @@ -2244,7 +2203,7 @@ private: else{ tu.open_if(tu.icmp(ICmpInst::ICMP_NE,tu.load(rs2+ traits::X0, 0),tu.constant( 0,8))); auto MMIN = tu.assignment(tu.constant( 1<<(static_cast(traits:: XLEN)-1),8),32); - tu.open_if(tu.logical_and(tu.icmp(ICmpInst::ICMP_EQ,tu.load(rs1+ traits::X0, 0),MMIN),tu.icmp(ICmpInst::ICMP_EQ,tu.ext(tu.load(rs2+ traits::X0, 0),32,false),tu.constant(- 1,8)))); + tu.open_if(tu.logical_and(tu.icmp(ICmpInst::ICMP_EQ,tu.load(rs1+ traits::X0, 0),MMIN),tu.icmp(ICmpInst::ICMP_EQ,tu.ext(tu.load(rs2+ traits::X0, 0),32,false),tu.constant(-1,8)))); if(rd!=0) { tu.store(rd + traits::X0,tu.constant( 0,8)); } @@ -2353,8 +2312,8 @@ private: pc=pc+ 2; gen_set_pc(tu, pc, traits::NEXT_PC); tu.open_scope(); - auto load_address = tu.assignment(tu.ext((tu.add(tu.load(rs1+ 8+ traits::X0, 0),tu.constant(uimm,8))),32,true),32); - tu.store(rd+ 8 + traits::X0,tu.ext(tu.ext(tu.read_mem(traits::MEM, load_address, 32),32,false),32,true)); + auto offs = tu.assignment(tu.ext((tu.add(tu.load(rs1+ 8+ traits::X0, 0),tu.constant(uimm,8))),32,true),32); + tu.store(rd+ 8 + traits::X0,tu.ext(tu.ext(tu.read_mem(traits::MEM, offs, 32),32,false),32,true)); auto returnValue = std::make_tuple(CONT); tu.close_scope(); vm_base::gen_sync(tu, POST_SYNC,58); @@ -2380,8 +2339,8 @@ private: pc=pc+ 2; gen_set_pc(tu, pc, traits::NEXT_PC); tu.open_scope(); - auto load_address = tu.assignment(tu.ext((tu.add(tu.load(rs1+ 8+ traits::X0, 0),tu.constant(uimm,8))),32,true),32); - tu.write_mem(traits::MEM, load_address, tu.ext(tu.load(rs2+ 8+ traits::X0, 0),32,true)); + auto offs = tu.assignment(tu.ext((tu.add(tu.load(rs1+ 8+ traits::X0, 0),tu.constant(uimm,8))),32,true),32); + tu.write_mem(traits::MEM, offs, tu.ext(tu.load(rs2+ 8+ traits::X0, 0),32,true)); auto returnValue = std::make_tuple(CONT); tu.close_scope(); vm_base::gen_sync(tu, POST_SYNC,59); @@ -2898,8 +2857,7 @@ private: } else{ auto offs = tu.assignment(tu.ext((tu.add(tu.load(2+ traits::X0, 0),tu.constant(uimm,8))),32,true),32); - auto res = tu.assignment(tu.ext(tu.read_mem(traits::MEM, offs, 32),32,false),32); - tu.store(rd + traits::X0,tu.ext(res,32,true)); + tu.store(rd + traits::X0,tu.ext(tu.ext(tu.read_mem(traits::MEM, offs, 32),32,false),32,true)); } auto returnValue = std::make_tuple(CONT); tu.close_scope(); @@ -2957,7 +2915,7 @@ private: gen_set_pc(tu, pc, traits::NEXT_PC); tu.open_scope(); if(rs1&&rs1(traits:: RFS)) { - auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(tu.load(rs1%static_cast(traits:: RFS)+ traits::X0, 0),tu.constant(~ 0x1,8)),32); + auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(tu.load(rs1%static_cast(traits:: RFS)+ traits::X0, 0),tu.constant(~0x1,8)),32); tu.store(traits::NEXT_PC, PC_val_v); tu.store(traits::LAST_BRANCH, tu.constant(2U, 2)); } @@ -3045,7 +3003,7 @@ private: else{ auto new_pc = tu.assignment(tu.load(rs1+ traits::X0, 0),32); tu.store(1 + traits::X0,tu.ext((tu.add(tu.ext(cur_pc_val,32,false),tu.constant( 2,8))),32,true)); - auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(new_pc,tu.constant(~ 0x1,8)),32); + auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(new_pc,tu.constant(~0x1,8)),32); tu.store(traits::NEXT_PC, PC_val_v); tu.store(traits::LAST_BRANCH, tu.constant(2U, 2)); } @@ -3138,11 +3096,64 @@ private: vm_impl::gen_trap_check(tu); return BRANCH; } + + //decoding functionality + + void populate_decoding_tree(decoding_tree_node* root){ + //create submask + for(auto instr: root->instrs){ + root->submask &= instr.mask; + } + //put each instr according to submask&encoding into children + for(auto instr: root->instrs){ + bool foundMatch = false; + for(auto child: root->children){ + //use value as identifying trait + if(child->value == (instr.value&root->submask)){ + child->instrs.push_back(instr); + foundMatch = true; + } + } + if(!foundMatch){ + decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask); + child->instrs.push_back(instr); + root->children.push_back(child); + } + } + root->instrs.clear(); + //call populate_decoding_tree for all children + if(root->children.size() >1) + for(auto child: root->children){ + populate_decoding_tree(child); + } + else{ + //sort instrs by value of the mask, this works bc we want to have the least restrictive one last + std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) { + return instr1.mask > instr2.mask; + }); + } + } + compile_func decode_instr(decoding_tree_node* node, code_word_t word){ + if(!node->children.size()){ + if(node->instrs.size() == 1) return node->instrs[0].op; + for(auto instr : node->instrs){ + if((instr.mask&word) == instr.value) return instr.op; + } + } + else{ + for(auto child : node->children){ + if (child->value == (node->submask&word)){ + return decode_instr(child, word); + } + } + } + return nullptr; + } }; -template void debug_fn(CODE_WORD insn) { - volatile CODE_WORD x = insn; - insn = 2 * x; +template void debug_fn(CODE_WORD instr) { + volatile CODE_WORD x = instr; + instr = 2 * x; } template vm_impl::vm_impl() { this(new ARCH()); } @@ -3150,14 +3161,11 @@ template vm_impl::vm_impl() { this(new ARCH()); } template vm_impl::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) : vm_base(core, core_id, cluster_id) { - qlut[0] = lut_00.data(); - qlut[1] = lut_01.data(); - qlut[2] = lut_10.data(); - qlut[3] = lut_11.data(); - for (auto instr : instr_descr) { - auto quantrant = instr.value & 0x3; - expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op); + root = new decoding_tree_node(std::numeric_limits::max()); + for(auto instr:instr_descr){ + root->instrs.push_back(instr); } + populate_decoding_tree(root); } template @@ -3165,30 +3173,19 @@ std::tuple vm_impl::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) { // we fetch at max 4 byte, alignment is 2 enum {TRAP_ID=1<<16}; - code_word_t insn = 0; - // const typename traits::addr_t upper_bits = ~traits::PGMASK; + code_word_t instr = 0; phys_addr_t paddr(pc); - auto *const data = (uint8_t *)&insn; paddr = this->core.v2p(pc); -// if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary -// auto res = this->core.read(paddr, 2, data); -// if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val); -// if ((insn & 0x3) == 0x3) { // this is a 32bit instruction -// res = this->core.read(this->core.v2p(pc + 2), 2, data + 2); -// } -// } else { - auto res = this->core.read(paddr, 4, data); - if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val); -// } - if (insn == 0x0000006f || (insn&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0' + auto res = this->core.read(paddr, 4, reinterpret_cast(&instr)); + if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val); + if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0' // curr pc on stack ++inst_cnt; - auto lut_val = extract_fields(insn); - auto f = qlut[insn & 0x3][lut_val]; + auto f = decode_instr(root, instr); if (f == nullptr) { f = &this_class::illegal_intruction; } - return (this->*f)(pc, insn, tu); + return (this->*f)(pc, instr, tu); } template void vm_impl::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) {