updates template for faster instruction decoding
This commit is contained in:
parent
c78026b720
commit
bd0d15f3a2
|
@ -159,30 +159,81 @@ private:
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* start opcode definitions
|
* start opcode definitions
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
struct InstructionDesriptor {
|
struct instruction_descriptor {
|
||||||
size_t length;
|
size_t length;
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
uint32_t mask;
|
uint32_t mask;
|
||||||
typename arch::traits<ARCH>::opcode_e op;
|
typename arch::traits<ARCH>::opcode_e op;
|
||||||
};
|
};
|
||||||
|
struct decoding_tree_node{
|
||||||
|
std::vector<instruction_descriptor> instrs;
|
||||||
|
std::vector<decoding_tree_node*> children;
|
||||||
|
uint32_t submask = std::numeric_limits<uint32_t>::max();
|
||||||
|
uint32_t value;
|
||||||
|
decoding_tree_node(uint32_t value) : value(value){}
|
||||||
|
};
|
||||||
|
|
||||||
const std::array<InstructionDesriptor, ${instructions.size}> instr_descr = {{
|
decoding_tree_node* root {nullptr};
|
||||||
|
const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
|
||||||
/* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
|
/* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
|
||||||
{${instr.length}, ${instr.encoding}, ${instr.mask}, arch::traits<ARCH>::opcode_e::${instr.instruction.name}},<%}%>
|
{${instr.length}, ${instr.encoding}, ${instr.mask}, arch::traits<ARCH>::opcode_e::${instr.instruction.name}},<%}%>
|
||||||
}};
|
}};
|
||||||
|
|
||||||
//static constexpr typename traits::addr_t upper_bits = ~traits::PGMASK;
|
|
||||||
iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
|
iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
|
||||||
auto phys_pc = this->core.v2p(pc);
|
auto phys_pc = this->core.v2p(pc);
|
||||||
//if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
|
|
||||||
// if (this->core.read(phys_pc, 2, data) != iss::Ok) return iss::Err;
|
|
||||||
// if ((data[0] & 0x3) == 0x3) // this is a 32bit instruction
|
|
||||||
// if (this->core.read(this->core.v2p(pc + 2), 2, data + 2) != iss::Ok) return iss::Err;
|
|
||||||
//} else {
|
|
||||||
if (this->core.read(phys_pc, 4, data) != iss::Ok) return iss::Err;
|
if (this->core.read(phys_pc, 4, data) != iss::Ok) return iss::Err;
|
||||||
//}
|
|
||||||
return iss::Ok;
|
return iss::Ok;
|
||||||
}
|
}
|
||||||
|
void populate_decoding_tree(decoding_tree_node* root){
|
||||||
|
//create submask
|
||||||
|
for(auto instr: root->instrs){
|
||||||
|
root->submask &= instr.mask;
|
||||||
|
}
|
||||||
|
//put each instr according to submask&encoding into children
|
||||||
|
for(auto instr: root->instrs){
|
||||||
|
bool foundMatch = false;
|
||||||
|
for(auto child: root->children){
|
||||||
|
//use value as identifying trait
|
||||||
|
if(child->value == (instr.value&root->submask)){
|
||||||
|
child->instrs.push_back(instr);
|
||||||
|
foundMatch = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(!foundMatch){
|
||||||
|
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
|
||||||
|
child->instrs.push_back(instr);
|
||||||
|
root->children.push_back(child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
root->instrs.clear();
|
||||||
|
//call populate_decoding_tree for all children
|
||||||
|
if(root->children.size() >1)
|
||||||
|
for(auto child: root->children){
|
||||||
|
populate_decoding_tree(child);
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
|
||||||
|
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
|
||||||
|
return instr1.mask > instr2.mask;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
typename arch::traits<ARCH>::opcode_e decodeInstr(decoding_tree_node* node, code_word_t word){
|
||||||
|
if(!node->children.size()){
|
||||||
|
if(node->instrs.size() == 1) return node->instrs[0].op;
|
||||||
|
for(auto instr : node->instrs){
|
||||||
|
if((instr.mask&word) == instr.value) return instr.op;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
for(auto child : node->children){
|
||||||
|
if (child->value == (node->submask&word)){
|
||||||
|
return decodeInstr(child, word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
|
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
|
||||||
|
@ -210,15 +261,11 @@ template <typename ARCH>
|
||||||
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
|
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
|
||||||
: vm_base<ARCH>(core, core_id, cluster_id) {
|
: vm_base<ARCH>(core, core_id, cluster_id) {
|
||||||
unsigned id=0;
|
unsigned id=0;
|
||||||
|
root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
|
||||||
for(auto instr:instr_descr){
|
for(auto instr:instr_descr){
|
||||||
auto quadrant = instr.value & 0x3;
|
root->instrs.push_back(instr);
|
||||||
qlut[quadrant].push_back(instruction_pattern{instr.value, instr.mask, instr.op});
|
|
||||||
}
|
|
||||||
for(auto& lut: qlut){
|
|
||||||
std::sort(std::begin(lut), std::end(lut), [](instruction_pattern const& a, instruction_pattern const& b){
|
|
||||||
return bit_count(a.mask) > bit_count(b.mask);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
populate_decoding_tree(root);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool is_count_limit_enabled(finish_cond_e cond){
|
inline bool is_count_limit_enabled(finish_cond_e cond){
|
||||||
|
@ -229,14 +276,6 @@ inline bool is_jump_to_self_enabled(finish_cond_e cond){
|
||||||
return (cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF;
|
return (cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ARCH>
|
|
||||||
typename arch::traits<ARCH>::opcode_e vm_impl<ARCH>::decode_inst_id(code_word_t instr){
|
|
||||||
for(auto& e: qlut[instr&0x3]){
|
|
||||||
if(!((instr&e.mask) ^ e.value )) return e.id;
|
|
||||||
}
|
|
||||||
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename ARCH>
|
template <typename ARCH>
|
||||||
typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){
|
typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){
|
||||||
auto pc=start;
|
auto pc=start;
|
||||||
|
@ -258,7 +297,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
|
||||||
} else {
|
} else {
|
||||||
if (is_jump_to_self_enabled(cond) &&
|
if (is_jump_to_self_enabled(cond) &&
|
||||||
(instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
(instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
||||||
auto inst_id = decode_inst_id(instr);
|
auto inst_id = decodeInstr(root, instr);
|
||||||
// pre execution stuff
|
// pre execution stuff
|
||||||
this->core.reg.last_branch = 0;
|
this->core.reg.last_branch = 0;
|
||||||
if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id));
|
if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id));
|
||||||
|
|
Loading…
Reference in New Issue