Compare commits

...

4 Commits

4 changed files with 295 additions and 223 deletions

View File

@ -158,30 +158,81 @@ private:
/**************************************************************************** /****************************************************************************
* start opcode definitions * start opcode definitions
****************************************************************************/ ****************************************************************************/
struct InstructionDesriptor { struct instruction_descriptor {
size_t length; size_t length;
uint32_t value; uint32_t value;
uint32_t mask; uint32_t mask;
typename arch::traits<ARCH>::opcode_e op; typename arch::traits<ARCH>::opcode_e op;
}; };
struct decoding_tree_node{
std::vector<instruction_descriptor> instrs;
std::vector<decoding_tree_node*> children;
uint32_t submask = std::numeric_limits<uint32_t>::max();
uint32_t value;
decoding_tree_node(uint32_t value) : value(value){}
};
const std::array<InstructionDesriptor, ${instructions.size}> instr_descr = {{ decoding_tree_node* root {nullptr};
const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
/* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %> /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
{${instr.length}, ${instr.encoding}, ${instr.mask}, arch::traits<ARCH>::opcode_e::${instr.instruction.name}},<%}%> {${instr.length}, ${instr.encoding}, ${instr.mask}, arch::traits<ARCH>::opcode_e::${instr.instruction.name}},<%}%>
}}; }};
//static constexpr typename traits::addr_t upper_bits = ~traits::PGMASK;
iss::status fetch_ins(virt_addr_t pc, uint8_t * data){ iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
auto phys_pc = this->core.v2p(pc); auto phys_pc = this->core.v2p(pc);
//if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
// if (this->core.read(phys_pc, 2, data) != iss::Ok) return iss::Err;
// if ((data[0] & 0x3) == 0x3) // this is a 32bit instruction
// if (this->core.read(this->core.v2p(pc + 2), 2, data + 2) != iss::Ok) return iss::Err;
//} else {
if (this->core.read(phys_pc, 4, data) != iss::Ok) return iss::Err; if (this->core.read(phys_pc, 4, data) != iss::Ok) return iss::Err;
//}
return iss::Ok; return iss::Ok;
} }
void populate_decoding_tree(decoding_tree_node* root){
//create submask
for(auto instr: root->instrs){
root->submask &= instr.mask;
}
//put each instr according to submask&encoding into children
for(auto instr: root->instrs){
bool foundMatch = false;
for(auto child: root->children){
//use value as identifying trait
if(child->value == (instr.value&root->submask)){
child->instrs.push_back(instr);
foundMatch = true;
}
}
if(!foundMatch){
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
child->instrs.push_back(instr);
root->children.push_back(child);
}
}
root->instrs.clear();
//call populate_decoding_tree for all children
if(root->children.size() >1)
for(auto child: root->children){
populate_decoding_tree(child);
}
else{
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
return instr1.mask > instr2.mask;
});
}
}
typename arch::traits<ARCH>::opcode_e decode_instr(decoding_tree_node* node, code_word_t word){
if(!node->children.size()){
if(node->instrs.size() == 1) return node->instrs[0].op;
for(auto instr : node->instrs){
if((instr.mask&word) == instr.value) return instr.op;
}
}
else{
for(auto child : node->children){
if (child->value == (node->submask&word)){
return decode_instr(child, word);
}
}
}
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
}
}; };
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) { template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
@ -208,16 +259,11 @@ constexpr size_t bit_count(uint32_t u) {
template <typename ARCH> template <typename ARCH>
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
: vm_base<ARCH>(core, core_id, cluster_id) { : vm_base<ARCH>(core, core_id, cluster_id) {
unsigned id=0; root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
for(auto instr:instr_descr){ for(auto instr:instr_descr){
auto quadrant = instr.value & 0x3; root->instrs.push_back(instr);
qlut[quadrant].push_back(instruction_pattern{instr.value, instr.mask, instr.op});
}
for(auto& lut: qlut){
std::sort(std::begin(lut), std::end(lut), [](instruction_pattern const& a, instruction_pattern const& b){
return bit_count(a.mask) > bit_count(b.mask);
});
} }
populate_decoding_tree(root);
} }
inline bool is_count_limit_enabled(finish_cond_e cond){ inline bool is_count_limit_enabled(finish_cond_e cond){
@ -228,14 +274,6 @@ inline bool is_jump_to_self_enabled(finish_cond_e cond){
return (cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF; return (cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF;
} }
template <typename ARCH>
typename arch::traits<ARCH>::opcode_e vm_impl<ARCH>::decode_inst_id(code_word_t instr){
for(auto& e: qlut[instr&0x3]){
if(!((instr&e.mask) ^ e.value )) return e.id;
}
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
}
template <typename ARCH> template <typename ARCH>
typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){
auto pc=start; auto pc=start;
@ -257,7 +295,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
} else { } else {
if (is_jump_to_self_enabled(cond) && if (is_jump_to_self_enabled(cond) &&
(instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0' (instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
auto inst_id = decode_inst_id(instr); auto inst_id = decode_instr(root, instr);
// pre execution stuff // pre execution stuff
this->core.reg.last_branch = 0; this->core.reg.last_branch = 0;
if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id)); if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id));

View File

@ -120,57 +120,7 @@ protected:
} }
} }
// some compile time constants
// enum { MASK16 = 0b1111110001100011, MASK32 = 0b11111111111100000111000001111111 };
enum { MASK16 = 0b1111111111111111, MASK32 = 0b11111111111100000111000001111111 };
enum { EXTR_MASK16 = MASK16 >> 2, EXTR_MASK32 = MASK32 >> 2 };
enum { LUT_SIZE = 1 << util::bit_count(static_cast<uint32_t>(EXTR_MASK32)), LUT_SIZE_C = 1 << util::bit_count(static_cast<uint32_t>(EXTR_MASK16)) };
std::array<compile_func, LUT_SIZE> lut;
std::array<compile_func, LUT_SIZE_C> lut_00, lut_01, lut_10;
std::array<compile_func, LUT_SIZE> lut_11;
std::array<compile_func *, 4> qlut;
std::array<const uint32_t, 4> lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}};
void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[],
compile_func f) {
if (pos < 0) {
lut[idx] = f;
} else {
auto bitmask = 1UL << pos;
if ((mask & bitmask) == 0) {
expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f);
} else {
if ((valid & bitmask) == 0) {
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f);
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f);
} else {
auto new_val = idx << 1;
if ((value & bitmask) != 0) new_val++;
expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f);
}
}
}
}
inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); }
uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) {
if (pos >= 0) {
auto bitmask = 1UL << pos;
if ((mask & bitmask) == 0) {
lut_val = extract_fields(pos - 1, val, mask, lut_val);
} else {
auto new_val = lut_val << 1;
if ((val & bitmask) != 0) new_val++;
lut_val = extract_fields(pos - 1, val, mask, new_val);
}
}
return lut_val;
}
template<unsigned W, typename U, typename S = typename std::make_signed<U>::type> template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
inline S sext(U from) { inline S sext(U from) {
auto mask = (1ULL<<W) - 1; auto mask = (1ULL<<W) - 1;
@ -182,14 +132,23 @@ private:
/**************************************************************************** /****************************************************************************
* start opcode definitions * start opcode definitions
****************************************************************************/ ****************************************************************************/
struct InstructionDesriptor { struct instruction_descriptor {
size_t length; size_t length;
uint32_t value; uint32_t value;
uint32_t mask; uint32_t mask;
compile_func op; compile_func op;
}; };
struct decoding_tree_node{
std::vector<instruction_descriptor> instrs;
std::vector<decoding_tree_node*> children;
uint32_t submask = std::numeric_limits<uint32_t>::max();
uint32_t value;
decoding_tree_node(uint32_t value) : value(value){}
};
const std::array<InstructionDesriptor, ${instructions.size}> instr_descr = {{ decoding_tree_node* root {nullptr};
const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
/* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %> /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
/* instruction ${instr.instruction.name}, encoding '${instr.encoding}' */ /* instruction ${instr.instruction.name}, encoding '${instr.encoding}' */
{${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%> {${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
@ -227,11 +186,64 @@ private:
vm_impl::gen_trap_check(tu); vm_impl::gen_trap_check(tu);
return BRANCH; return BRANCH;
} }
//decoding functionality
void populate_decoding_tree(decoding_tree_node* root){
//create submask
for(auto instr: root->instrs){
root->submask &= instr.mask;
}
//put each instr according to submask&encoding into children
for(auto instr: root->instrs){
bool foundMatch = false;
for(auto child: root->children){
//use value as identifying trait
if(child->value == (instr.value&root->submask)){
child->instrs.push_back(instr);
foundMatch = true;
}
}
if(!foundMatch){
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
child->instrs.push_back(instr);
root->children.push_back(child);
}
}
root->instrs.clear();
//call populate_decoding_tree for all children
if(root->children.size() >1)
for(auto child: root->children){
populate_decoding_tree(child);
}
else{
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
return instr1.mask > instr2.mask;
});
}
}
compile_func decode_instr(decoding_tree_node* node, code_word_t word){
if(!node->children.size()){
if(node->instrs.size() == 1) return node->instrs[0].op;
for(auto instr : node->instrs){
if((instr.mask&word) == instr.value) return instr.op;
}
}
else{
for(auto child : node->children){
if (child->value == (node->submask&word)){
return decode_instr(child, word);
}
}
}
return nullptr;
}
}; };
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) { template <typename CODE_WORD> void debug_fn(CODE_WORD instr) {
volatile CODE_WORD x = insn; volatile CODE_WORD x = instr;
insn = 2 * x; instr = 2 * x;
} }
template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); } template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
@ -239,14 +251,11 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
template <typename ARCH> template <typename ARCH>
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
: vm_base<ARCH>(core, core_id, cluster_id) { : vm_base<ARCH>(core, core_id, cluster_id) {
qlut[0] = lut_00.data(); root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
qlut[1] = lut_01.data();
qlut[2] = lut_10.data();
qlut[3] = lut_11.data();
for(auto instr:instr_descr){ for(auto instr:instr_descr){
auto quantrant = instr.value & 0x3; root->instrs.push_back(instr);
expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op);
} }
populate_decoding_tree(root);
} }
template <typename ARCH> template <typename ARCH>
@ -254,30 +263,19 @@ std::tuple<continuation_e>
vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) { vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) {
// we fetch at max 4 byte, alignment is 2 // we fetch at max 4 byte, alignment is 2
enum {TRAP_ID=1<<16}; enum {TRAP_ID=1<<16};
code_word_t insn = 0; code_word_t instr = 0;
// const typename traits::addr_t upper_bits = ~traits::PGMASK;
phys_addr_t paddr(pc); phys_addr_t paddr(pc);
auto *const data = (uint8_t *)&insn;
paddr = this->core.v2p(pc); paddr = this->core.v2p(pc);
// if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
// auto res = this->core.read(paddr, 2, data);
// if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
// if ((insn & 0x3) == 0x3) { // this is a 32bit instruction
// res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
// }
// } else {
auto res = this->core.read(paddr, 4, data);
if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val); if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
// } if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
if (insn == 0x0000006f || (insn&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
// curr pc on stack // curr pc on stack
++inst_cnt; ++inst_cnt;
auto lut_val = extract_fields(insn); auto f = decode_instr(root, instr);
auto f = qlut[insn & 0x3][lut_val];
if (f == nullptr) { if (f == nullptr) {
f = &this_class::illegal_intruction; f = &this_class::illegal_intruction;
} }
return (this->*f)(pc, insn, tu); return (this->*f)(pc, instr, tu);
} }
template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) { template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) {

View File

@ -152,14 +152,22 @@ private:
/**************************************************************************** /****************************************************************************
* start opcode definitions * start opcode definitions
****************************************************************************/ ****************************************************************************/
struct InstructionDesriptor { struct instruction_descriptor {
size_t length; size_t length;
uint32_t value; uint32_t value;
uint32_t mask; uint32_t mask;
typename arch::traits<ARCH>::opcode_e op; typename arch::traits<ARCH>::opcode_e op;
}; };
struct decoding_tree_node{
std::vector<instruction_descriptor> instrs;
std::vector<decoding_tree_node*> children;
uint32_t submask = std::numeric_limits<uint32_t>::max();
uint32_t value;
decoding_tree_node(uint32_t value) : value(value){}
};
const std::array<InstructionDesriptor, 87> instr_descr = {{ decoding_tree_node* root {nullptr};
const std::array<instruction_descriptor, 87> instr_descr = {{
/* entries are: size, valid value, valid mask, function ptr */ /* entries are: size, valid value, valid mask, function ptr */
{32, 0b00000000000000000000000000110111, 0b00000000000000000000000001111111, arch::traits<ARCH>::opcode_e::LUI}, {32, 0b00000000000000000000000000110111, 0b00000000000000000000000001111111, arch::traits<ARCH>::opcode_e::LUI},
{32, 0b00000000000000000000000000010111, 0b00000000000000000000000001111111, arch::traits<ARCH>::opcode_e::AUIPC}, {32, 0b00000000000000000000000000010111, 0b00000000000000000000000001111111, arch::traits<ARCH>::opcode_e::AUIPC},
@ -250,18 +258,61 @@ private:
{16, 0b0000000000000000, 0b1111111111111111, arch::traits<ARCH>::opcode_e::DII}, {16, 0b0000000000000000, 0b1111111111111111, arch::traits<ARCH>::opcode_e::DII},
}}; }};
//static constexpr typename traits::addr_t upper_bits = ~traits::PGMASK;
iss::status fetch_ins(virt_addr_t pc, uint8_t * data){ iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
auto phys_pc = this->core.v2p(pc); auto phys_pc = this->core.v2p(pc);
//if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
// if (this->core.read(phys_pc, 2, data) != iss::Ok) return iss::Err;
// if ((data[0] & 0x3) == 0x3) // this is a 32bit instruction
// if (this->core.read(this->core.v2p(pc + 2), 2, data + 2) != iss::Ok) return iss::Err;
//} else {
if (this->core.read(phys_pc, 4, data) != iss::Ok) return iss::Err; if (this->core.read(phys_pc, 4, data) != iss::Ok) return iss::Err;
//}
return iss::Ok; return iss::Ok;
} }
void populate_decoding_tree(decoding_tree_node* root){
//create submask
for(auto instr: root->instrs){
root->submask &= instr.mask;
}
//put each instr according to submask&encoding into children
for(auto instr: root->instrs){
bool foundMatch = false;
for(auto child: root->children){
//use value as identifying trait
if(child->value == (instr.value&root->submask)){
child->instrs.push_back(instr);
foundMatch = true;
}
}
if(!foundMatch){
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
child->instrs.push_back(instr);
root->children.push_back(child);
}
}
root->instrs.clear();
//call populate_decoding_tree for all children
if(root->children.size() >1)
for(auto child: root->children){
populate_decoding_tree(child);
}
else{
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
return instr1.mask > instr2.mask;
});
}
}
typename arch::traits<ARCH>::opcode_e decode_instr(decoding_tree_node* node, code_word_t word){
if(!node->children.size()){
if(node->instrs.size() == 1) return node->instrs[0].op;
for(auto instr : node->instrs){
if((instr.mask&word) == instr.value) return instr.op;
}
}
else{
for(auto child : node->children){
if (child->value == (node->submask&word)){
return decode_instr(child, word);
}
}
}
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
}
}; };
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) { template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
@ -288,16 +339,11 @@ constexpr size_t bit_count(uint32_t u) {
template <typename ARCH> template <typename ARCH>
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
: vm_base<ARCH>(core, core_id, cluster_id) { : vm_base<ARCH>(core, core_id, cluster_id) {
unsigned id=0; root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
for(auto instr:instr_descr){ for(auto instr:instr_descr){
auto quadrant = instr.value & 0x3; root->instrs.push_back(instr);
qlut[quadrant].push_back(instruction_pattern{instr.value, instr.mask, instr.op});
}
for(auto& lut: qlut){
std::sort(std::begin(lut), std::end(lut), [](instruction_pattern const& a, instruction_pattern const& b){
return bit_count(a.mask) > bit_count(b.mask);
});
} }
populate_decoding_tree(root);
} }
inline bool is_count_limit_enabled(finish_cond_e cond){ inline bool is_count_limit_enabled(finish_cond_e cond){
@ -308,14 +354,6 @@ inline bool is_jump_to_self_enabled(finish_cond_e cond){
return (cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF; return (cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF;
} }
template <typename ARCH>
typename arch::traits<ARCH>::opcode_e vm_impl<ARCH>::decode_inst_id(code_word_t instr){
for(auto& e: qlut[instr&0x3]){
if(!((instr&e.mask) ^ e.value )) return e.id;
}
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
}
template <typename ARCH> template <typename ARCH>
typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){
auto pc=start; auto pc=start;
@ -337,7 +375,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
} else { } else {
if (is_jump_to_self_enabled(cond) && if (is_jump_to_self_enabled(cond) &&
(instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0' (instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
auto inst_id = decode_inst_id(instr); auto inst_id = decode_instr(root, instr);
// pre execution stuff // pre execution stuff
this->core.reg.last_branch = 0; this->core.reg.last_branch = 0;
if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id)); if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id));

View File

@ -120,57 +120,7 @@ protected:
} }
} }
// some compile time constants
// enum { MASK16 = 0b1111110001100011, MASK32 = 0b11111111111100000111000001111111 };
enum { MASK16 = 0b1111111111111111, MASK32 = 0b11111111111100000111000001111111 };
enum { EXTR_MASK16 = MASK16 >> 2, EXTR_MASK32 = MASK32 >> 2 };
enum { LUT_SIZE = 1 << util::bit_count(static_cast<uint32_t>(EXTR_MASK32)), LUT_SIZE_C = 1 << util::bit_count(static_cast<uint32_t>(EXTR_MASK16)) };
std::array<compile_func, LUT_SIZE> lut;
std::array<compile_func, LUT_SIZE_C> lut_00, lut_01, lut_10;
std::array<compile_func, LUT_SIZE> lut_11;
std::array<compile_func *, 4> qlut;
std::array<const uint32_t, 4> lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}};
void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[],
compile_func f) {
if (pos < 0) {
lut[idx] = f;
} else {
auto bitmask = 1UL << pos;
if ((mask & bitmask) == 0) {
expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f);
} else {
if ((valid & bitmask) == 0) {
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f);
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f);
} else {
auto new_val = idx << 1;
if ((value & bitmask) != 0) new_val++;
expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f);
}
}
}
}
inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); }
uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) {
if (pos >= 0) {
auto bitmask = 1UL << pos;
if ((mask & bitmask) == 0) {
lut_val = extract_fields(pos - 1, val, mask, lut_val);
} else {
auto new_val = lut_val << 1;
if ((val & bitmask) != 0) new_val++;
lut_val = extract_fields(pos - 1, val, mask, new_val);
}
}
return lut_val;
}
template<unsigned W, typename U, typename S = typename std::make_signed<U>::type> template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
inline S sext(U from) { inline S sext(U from) {
auto mask = (1ULL<<W) - 1; auto mask = (1ULL<<W) - 1;
@ -182,14 +132,23 @@ private:
/**************************************************************************** /****************************************************************************
* start opcode definitions * start opcode definitions
****************************************************************************/ ****************************************************************************/
struct InstructionDesriptor { struct instruction_descriptor {
size_t length; size_t length;
uint32_t value; uint32_t value;
uint32_t mask; uint32_t mask;
compile_func op; compile_func op;
}; };
struct decoding_tree_node{
std::vector<instruction_descriptor> instrs;
std::vector<decoding_tree_node*> children;
uint32_t submask = std::numeric_limits<uint32_t>::max();
uint32_t value;
decoding_tree_node(uint32_t value) : value(value){}
};
const std::array<InstructionDesriptor, 87> instr_descr = {{ decoding_tree_node* root {nullptr};
const std::array<instruction_descriptor, 87> instr_descr = {{
/* entries are: size, valid value, valid mask, function ptr */ /* entries are: size, valid value, valid mask, function ptr */
/* instruction LUI, encoding '0b00000000000000000000000000110111' */ /* instruction LUI, encoding '0b00000000000000000000000000110111' */
{32, 0b00000000000000000000000000110111, 0b00000000000000000000000001111111, &this_class::__lui}, {32, 0b00000000000000000000000000110111, 0b00000000000000000000000001111111, &this_class::__lui},
@ -3136,11 +3095,64 @@ private:
vm_impl::gen_trap_check(tu); vm_impl::gen_trap_check(tu);
return BRANCH; return BRANCH;
} }
//decoding functionality
void populate_decoding_tree(decoding_tree_node* root){
//create submask
for(auto instr: root->instrs){
root->submask &= instr.mask;
}
//put each instr according to submask&encoding into children
for(auto instr: root->instrs){
bool foundMatch = false;
for(auto child: root->children){
//use value as identifying trait
if(child->value == (instr.value&root->submask)){
child->instrs.push_back(instr);
foundMatch = true;
}
}
if(!foundMatch){
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
child->instrs.push_back(instr);
root->children.push_back(child);
}
}
root->instrs.clear();
//call populate_decoding_tree for all children
if(root->children.size() >1)
for(auto child: root->children){
populate_decoding_tree(child);
}
else{
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
return instr1.mask > instr2.mask;
});
}
}
compile_func decode_instr(decoding_tree_node* node, code_word_t word){
if(!node->children.size()){
if(node->instrs.size() == 1) return node->instrs[0].op;
for(auto instr : node->instrs){
if((instr.mask&word) == instr.value) return instr.op;
}
}
else{
for(auto child : node->children){
if (child->value == (node->submask&word)){
return decode_instr(child, word);
}
}
}
return nullptr;
}
}; };
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) { template <typename CODE_WORD> void debug_fn(CODE_WORD instr) {
volatile CODE_WORD x = insn; volatile CODE_WORD x = instr;
insn = 2 * x; instr = 2 * x;
} }
template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); } template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
@ -3148,14 +3160,11 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
template <typename ARCH> template <typename ARCH>
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id) vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
: vm_base<ARCH>(core, core_id, cluster_id) { : vm_base<ARCH>(core, core_id, cluster_id) {
qlut[0] = lut_00.data(); root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
qlut[1] = lut_01.data();
qlut[2] = lut_10.data();
qlut[3] = lut_11.data();
for(auto instr:instr_descr){ for(auto instr:instr_descr){
auto quantrant = instr.value & 0x3; root->instrs.push_back(instr);
expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op);
} }
populate_decoding_tree(root);
} }
template <typename ARCH> template <typename ARCH>
@ -3163,30 +3172,19 @@ std::tuple<continuation_e>
vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) { vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) {
// we fetch at max 4 byte, alignment is 2 // we fetch at max 4 byte, alignment is 2
enum {TRAP_ID=1<<16}; enum {TRAP_ID=1<<16};
code_word_t insn = 0; code_word_t instr = 0;
// const typename traits::addr_t upper_bits = ~traits::PGMASK;
phys_addr_t paddr(pc); phys_addr_t paddr(pc);
auto *const data = (uint8_t *)&insn;
paddr = this->core.v2p(pc); paddr = this->core.v2p(pc);
// if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
// auto res = this->core.read(paddr, 2, data);
// if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
// if ((insn & 0x3) == 0x3) { // this is a 32bit instruction
// res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
// }
// } else {
auto res = this->core.read(paddr, 4, data);
if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val); if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
// } if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
if (insn == 0x0000006f || (insn&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
// curr pc on stack // curr pc on stack
++inst_cnt; ++inst_cnt;
auto lut_val = extract_fields(insn); auto f = decode_instr(root, instr);
auto f = qlut[insn & 0x3][lut_val];
if (f == nullptr) { if (f == nullptr) {
f = &this_class::illegal_intruction; f = &this_class::illegal_intruction;
} }
return (this->*f)(pc, insn, tu); return (this->*f)(pc, instr, tu);
} }
template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) { template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) {