fix instruction decode
This commit is contained in:
parent
473f8a5a17
commit
e68918c2e8
|
@ -85,6 +85,7 @@ protected:
|
|||
|
||||
inline const char *name(size_t index){return traits::reg_aliases.at(index);}
|
||||
|
||||
compile_func decode_inst(code_word_t instr) ;
|
||||
virt_addr_t execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit) override;
|
||||
|
||||
// some compile time constants
|
||||
|
@ -98,46 +99,13 @@ protected:
|
|||
std::array<compile_func, LUT_SIZE_C> lut_00, lut_01, lut_10;
|
||||
std::array<compile_func, LUT_SIZE> lut_11;
|
||||
|
||||
std::array<compile_func *, 4> qlut;
|
||||
struct instruction_pattern {
|
||||
uint32_t value;
|
||||
uint32_t mask;
|
||||
compile_func opc;
|
||||
};
|
||||
|
||||
std::array<const uint32_t, 4> lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}};
|
||||
|
||||
void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[],
|
||||
compile_func f) {
|
||||
if (pos < 0) {
|
||||
lut[idx] = f;
|
||||
} else {
|
||||
auto bitmask = 1UL << pos;
|
||||
if ((mask & bitmask) == 0) {
|
||||
expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f);
|
||||
} else {
|
||||
if ((valid & bitmask) == 0) {
|
||||
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f);
|
||||
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f);
|
||||
} else {
|
||||
auto new_val = idx << 1;
|
||||
if ((value & bitmask) != 0) new_val++;
|
||||
expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); }
|
||||
|
||||
uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) {
|
||||
if (pos >= 0) {
|
||||
auto bitmask = 1UL << pos;
|
||||
if ((mask & bitmask) == 0) {
|
||||
lut_val = extract_fields(pos - 1, val, mask, lut_val);
|
||||
} else {
|
||||
auto new_val = lut_val << 1;
|
||||
if ((val & bitmask) != 0) new_val++;
|
||||
lut_val = extract_fields(pos - 1, val, mask, new_val);
|
||||
}
|
||||
}
|
||||
return lut_val;
|
||||
}
|
||||
std::array<std::vector<instruction_pattern>, 4> qlut;
|
||||
|
||||
inline void raise(uint16_t trap_id, uint16_t cause){
|
||||
auto trap_val = 0x80ULL << 24 | (cause << 16) | trap_id;
|
||||
|
@ -296,22 +264,46 @@ template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
|
|||
|
||||
template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
|
||||
|
||||
// according to
|
||||
// https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation
|
||||
#ifdef __GCC__
|
||||
constexpr size_t bit_count(uint32_t u) { return __builtin_popcount(u); }
|
||||
#elif __cplusplus < 201402L
|
||||
constexpr size_t uCount(uint32_t u) { return u - ((u >> 1) & 033333333333) - ((u >> 2) & 011111111111); }
|
||||
constexpr size_t bit_count(uint32_t u) { return ((uCount(u) + (uCount(u) >> 3)) & 030707070707) % 63; }
|
||||
#else
|
||||
constexpr size_t bit_count(uint32_t u) {
|
||||
size_t uCount = u - ((u >> 1) & 033333333333) - ((u >> 2) & 011111111111);
|
||||
return ((uCount + (uCount >> 3)) & 030707070707) % 63;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename ARCH>
|
||||
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
|
||||
: vm_base<ARCH>(core, core_id, cluster_id) {
|
||||
qlut[0] = lut_00.data();
|
||||
qlut[1] = lut_01.data();
|
||||
qlut[2] = lut_10.data();
|
||||
qlut[3] = lut_11.data();
|
||||
for (auto instr : instr_descr) {
|
||||
auto quantrant = instr.value & 0x3;
|
||||
expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op);
|
||||
auto quadrant = instr.value & 0x3;
|
||||
qlut[quadrant].push_back(instruction_pattern{instr.value, instr.mask, instr.op});
|
||||
}
|
||||
for(auto& lut: qlut){
|
||||
std::sort(std::begin(lut), std::end(lut), [](instruction_pattern const& a, instruction_pattern const& b){
|
||||
return bit_count(a.mask) > bit_count(b.mask);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
inline bool is_count_limit_enabled(finish_cond_e cond){
|
||||
return (cond & finish_cond_e::COUNT_LIMIT) == finish_cond_e::COUNT_LIMIT;
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
typename vm_impl<ARCH>::compile_func vm_impl<ARCH>::decode_inst(code_word_t instr){
|
||||
for(auto& e: qlut[instr&0x3]){
|
||||
if(!((instr&e.mask) ^ e.value )) return e.opc;
|
||||
}
|
||||
return &this_class::illegal_intruction;
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){
|
||||
// we fetch at max 4 byte, alignment is 2
|
||||
|
@ -329,10 +321,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
|
|||
}
|
||||
if ((cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF &&
|
||||
(insn == 0x0000006f || (insn&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
||||
auto lut_val = extract_fields(insn);
|
||||
auto f = qlut[insn & 0x3][lut_val];
|
||||
if (!f)
|
||||
f = &this_class::illegal_intruction;
|
||||
auto f = decode_inst(insn);
|
||||
pc = (this->*f)(pc, insn);
|
||||
}
|
||||
return pc;
|
||||
|
|
|
@ -144,10 +144,10 @@ public:
|
|||
|
||||
mstatus_t mstatus;
|
||||
|
||||
static const reg_t mstatus_reset_val = 0;
|
||||
static const reg_t mstatus_reset_val = 0x1800;
|
||||
|
||||
void write_mstatus(T val) {
|
||||
auto mask = get_mask();
|
||||
auto mask = get_mask() &0xff; // MPP is hardcode as 0x3
|
||||
auto new_val = (mstatus.backing.val & ~mask) | (val & mask);
|
||||
mstatus = new_val;
|
||||
}
|
||||
|
|
|
@ -85,6 +85,7 @@ protected:
|
|||
|
||||
inline const char *name(size_t index){return traits::reg_aliases.at(index);}
|
||||
|
||||
compile_func decode_inst(code_word_t instr) ;
|
||||
virt_addr_t execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit) override;
|
||||
|
||||
// some compile time constants
|
||||
|
@ -98,46 +99,13 @@ protected:
|
|||
std::array<compile_func, LUT_SIZE_C> lut_00, lut_01, lut_10;
|
||||
std::array<compile_func, LUT_SIZE> lut_11;
|
||||
|
||||
std::array<compile_func *, 4> qlut;
|
||||
struct instruction_pattern {
|
||||
uint32_t value;
|
||||
uint32_t mask;
|
||||
compile_func opc;
|
||||
};
|
||||
|
||||
std::array<const uint32_t, 4> lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}};
|
||||
|
||||
void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[],
|
||||
compile_func f) {
|
||||
if (pos < 0) {
|
||||
lut[idx] = f;
|
||||
} else {
|
||||
auto bitmask = 1UL << pos;
|
||||
if ((mask & bitmask) == 0) {
|
||||
expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f);
|
||||
} else {
|
||||
if ((valid & bitmask) == 0) {
|
||||
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f);
|
||||
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f);
|
||||
} else {
|
||||
auto new_val = idx << 1;
|
||||
if ((value & bitmask) != 0) new_val++;
|
||||
expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); }
|
||||
|
||||
uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) {
|
||||
if (pos >= 0) {
|
||||
auto bitmask = 1UL << pos;
|
||||
if ((mask & bitmask) == 0) {
|
||||
lut_val = extract_fields(pos - 1, val, mask, lut_val);
|
||||
} else {
|
||||
auto new_val = lut_val << 1;
|
||||
if ((val & bitmask) != 0) new_val++;
|
||||
lut_val = extract_fields(pos - 1, val, mask, new_val);
|
||||
}
|
||||
}
|
||||
return lut_val;
|
||||
}
|
||||
std::array<std::vector<instruction_pattern>, 4> qlut;
|
||||
|
||||
inline void raise(uint16_t trap_id, uint16_t cause){
|
||||
auto trap_val = 0x80ULL << 24 | (cause << 16) | trap_id;
|
||||
|
@ -4113,22 +4081,46 @@ template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
|
|||
|
||||
template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
|
||||
|
||||
// according to
|
||||
// https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation
|
||||
#ifdef __GCC__
|
||||
constexpr size_t bit_count(uint32_t u) { return __builtin_popcount(u); }
|
||||
#elif __cplusplus < 201402L
|
||||
constexpr size_t uCount(uint32_t u) { return u - ((u >> 1) & 033333333333) - ((u >> 2) & 011111111111); }
|
||||
constexpr size_t bit_count(uint32_t u) { return ((uCount(u) + (uCount(u) >> 3)) & 030707070707) % 63; }
|
||||
#else
|
||||
constexpr size_t bit_count(uint32_t u) {
|
||||
size_t uCount = u - ((u >> 1) & 033333333333) - ((u >> 2) & 011111111111);
|
||||
return ((uCount + (uCount >> 3)) & 030707070707) % 63;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename ARCH>
|
||||
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
|
||||
: vm_base<ARCH>(core, core_id, cluster_id) {
|
||||
qlut[0] = lut_00.data();
|
||||
qlut[1] = lut_01.data();
|
||||
qlut[2] = lut_10.data();
|
||||
qlut[3] = lut_11.data();
|
||||
for (auto instr : instr_descr) {
|
||||
auto quantrant = instr.value & 0x3;
|
||||
expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op);
|
||||
auto quadrant = instr.value & 0x3;
|
||||
qlut[quadrant].push_back(instruction_pattern{instr.value, instr.mask, instr.op});
|
||||
}
|
||||
for(auto& lut: qlut){
|
||||
std::sort(std::begin(lut), std::end(lut), [](instruction_pattern const& a, instruction_pattern const& b){
|
||||
return bit_count(a.mask) > bit_count(b.mask);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
inline bool is_count_limit_enabled(finish_cond_e cond){
|
||||
return (cond & finish_cond_e::COUNT_LIMIT) == finish_cond_e::COUNT_LIMIT;
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
typename vm_impl<ARCH>::compile_func vm_impl<ARCH>::decode_inst(code_word_t instr){
|
||||
for(auto& e: qlut[instr&0x3]){
|
||||
if(!((instr&e.mask) ^ e.value )) return e.opc;
|
||||
}
|
||||
return &this_class::illegal_intruction;
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){
|
||||
// we fetch at max 4 byte, alignment is 2
|
||||
|
@ -4146,10 +4138,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
|
|||
}
|
||||
if ((cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF &&
|
||||
(insn == 0x0000006f || (insn&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
||||
auto lut_val = extract_fields(insn);
|
||||
auto f = qlut[insn & 0x3][lut_val];
|
||||
if (!f)
|
||||
f = &this_class::illegal_intruction;
|
||||
auto f = decode_inst(insn);
|
||||
pc = (this->*f)(pc, insn);
|
||||
}
|
||||
return pc;
|
||||
|
|
Loading…
Reference in New Issue