Merge branch 'develop' of
https://git.minres.com/DBT-RISE/DBT-RISE-TGC.git into develop
This commit is contained in:
commit
e68f9c573f
|
@ -1,10 +1,12 @@
|
|||
cmake_minimum_required(VERSION 3.12)
|
||||
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
|
||||
###############################################################################
|
||||
#
|
||||
###############################################################################
|
||||
project(dbt-rise-tgc VERSION 1.0.0)
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(flink)
|
||||
|
||||
find_package(elfio QUIET)
|
||||
find_package(Boost COMPONENTS coroutine)
|
||||
|
@ -70,7 +72,7 @@ if(TARGET RapidJSON OR TARGET RapidJSON::RapidJSON)
|
|||
endif()
|
||||
|
||||
# Define the library
|
||||
add_library(${PROJECT_NAME} ${LIB_SOURCES})
|
||||
add_library(${PROJECT_NAME} SHARED ${LIB_SOURCES})
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE -Wno-shift-count-overflow)
|
||||
|
@ -85,11 +87,9 @@ if(TARGET jsoncpp::jsoncpp)
|
|||
else()
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC jsoncpp)
|
||||
endif()
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND BUILD_SHARED_LIBS)
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC -Wl,--whole-archive dbt-rise-core -Wl,--no-whole-archive)
|
||||
else()
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC dbt-rise-core)
|
||||
endif()
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC dbt-rise-core)
|
||||
|
||||
if(TARGET elfio::elfio)
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC elfio::elfio)
|
||||
else()
|
||||
|
@ -164,7 +164,7 @@ if(WITH_TCC)
|
|||
target_compile_definitions(${PROJECT_NAME} PRIVATE WITH_TCC)
|
||||
endif()
|
||||
# Links the target exe against the libraries
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC dbt-rise-tgc)
|
||||
target_force_link_libraries(${PROJECT_NAME} PUBLIC dbt-rise-tgc)
|
||||
if(TARGET Boost::program_options)
|
||||
target_link_libraries(${PROJECT_NAME} PUBLIC Boost::program_options)
|
||||
else()
|
||||
|
|
|
@ -0,0 +1,35 @@
|
|||
# according to https://github.com/horance-liu/flink.cmake/tree/master
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
include(CMakeParseArguments)
|
||||
|
||||
function(target_do_force_link_libraries target visibility lib)
|
||||
if(MSVC)
|
||||
target_link_libraries(${target} ${visibility} "/WHOLEARCHIVE:${lib}")
|
||||
elseif(APPLE)
|
||||
target_link_libraries(${target} ${visibility} -Wl,-force_load ${lib})
|
||||
else()
|
||||
target_link_libraries(${target} ${visibility} -Wl,--whole-archive ${lib} -Wl,--no-whole-archive)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(target_force_link_libraries target)
|
||||
cmake_parse_arguments(FLINK
|
||||
""
|
||||
""
|
||||
"PUBLIC;INTERFACE;PRIVATE"
|
||||
${ARGN}
|
||||
)
|
||||
|
||||
foreach(lib IN LISTS FLINK_PUBLIC)
|
||||
target_do_force_link_libraries(${target} PUBLIC ${lib})
|
||||
endforeach()
|
||||
|
||||
foreach(lib IN LISTS FLINK_INTERFACE)
|
||||
target_do_force_link_libraries(${target} INTERFACE ${lib})
|
||||
endforeach()
|
||||
|
||||
foreach(lib IN LISTS FLINK_PRIVATE)
|
||||
target_do_force_link_libraries(${target} PRIVATE ${lib})
|
||||
endforeach()
|
||||
endfunction()
|
|
@ -158,30 +158,81 @@ private:
|
|||
/****************************************************************************
|
||||
* start opcode definitions
|
||||
****************************************************************************/
|
||||
struct InstructionDesriptor {
|
||||
struct instruction_descriptor {
|
||||
size_t length;
|
||||
uint32_t value;
|
||||
uint32_t mask;
|
||||
typename arch::traits<ARCH>::opcode_e op;
|
||||
};
|
||||
struct decoding_tree_node{
|
||||
std::vector<instruction_descriptor> instrs;
|
||||
std::vector<decoding_tree_node*> children;
|
||||
uint32_t submask = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t value;
|
||||
decoding_tree_node(uint32_t value) : value(value){}
|
||||
};
|
||||
|
||||
const std::array<InstructionDesriptor, ${instructions.size}> instr_descr = {{
|
||||
decoding_tree_node* root {nullptr};
|
||||
const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
|
||||
/* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
|
||||
{${instr.length}, ${instr.encoding}, ${instr.mask}, arch::traits<ARCH>::opcode_e::${instr.instruction.name}},<%}%>
|
||||
}};
|
||||
|
||||
//static constexpr typename traits::addr_t upper_bits = ~traits::PGMASK;
|
||||
iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
|
||||
auto phys_pc = this->core.v2p(pc);
|
||||
//if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
|
||||
// if (this->core.read(phys_pc, 2, data) != iss::Ok) return iss::Err;
|
||||
// if ((data[0] & 0x3) == 0x3) // this is a 32bit instruction
|
||||
// if (this->core.read(this->core.v2p(pc + 2), 2, data + 2) != iss::Ok) return iss::Err;
|
||||
//} else {
|
||||
if (this->core.read(phys_pc, 4, data) != iss::Ok) return iss::Err;
|
||||
//}
|
||||
return iss::Ok;
|
||||
}
|
||||
void populate_decoding_tree(decoding_tree_node* root){
|
||||
//create submask
|
||||
for(auto instr: root->instrs){
|
||||
root->submask &= instr.mask;
|
||||
}
|
||||
//put each instr according to submask&encoding into children
|
||||
for(auto instr: root->instrs){
|
||||
bool foundMatch = false;
|
||||
for(auto child: root->children){
|
||||
//use value as identifying trait
|
||||
if(child->value == (instr.value&root->submask)){
|
||||
child->instrs.push_back(instr);
|
||||
foundMatch = true;
|
||||
}
|
||||
}
|
||||
if(!foundMatch){
|
||||
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
|
||||
child->instrs.push_back(instr);
|
||||
root->children.push_back(child);
|
||||
}
|
||||
}
|
||||
root->instrs.clear();
|
||||
//call populate_decoding_tree for all children
|
||||
if(root->children.size() >1)
|
||||
for(auto child: root->children){
|
||||
populate_decoding_tree(child);
|
||||
}
|
||||
else{
|
||||
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
|
||||
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
|
||||
return instr1.mask > instr2.mask;
|
||||
});
|
||||
}
|
||||
}
|
||||
typename arch::traits<ARCH>::opcode_e decode_instr(decoding_tree_node* node, code_word_t word){
|
||||
if(!node->children.size()){
|
||||
if(node->instrs.size() == 1) return node->instrs[0].op;
|
||||
for(auto instr : node->instrs){
|
||||
if((instr.mask&word) == instr.value) return instr.op;
|
||||
}
|
||||
}
|
||||
else{
|
||||
for(auto child : node->children){
|
||||
if (child->value == (node->submask&word)){
|
||||
return decode_instr(child, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
|
||||
|
@ -208,16 +259,11 @@ constexpr size_t bit_count(uint32_t u) {
|
|||
template <typename ARCH>
|
||||
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
|
||||
: vm_base<ARCH>(core, core_id, cluster_id) {
|
||||
unsigned id=0;
|
||||
for (auto instr : instr_descr) {
|
||||
auto quadrant = instr.value & 0x3;
|
||||
qlut[quadrant].push_back(instruction_pattern{instr.value, instr.mask, instr.op});
|
||||
}
|
||||
for(auto& lut: qlut){
|
||||
std::sort(std::begin(lut), std::end(lut), [](instruction_pattern const& a, instruction_pattern const& b){
|
||||
return bit_count(a.mask) > bit_count(b.mask);
|
||||
});
|
||||
root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
|
||||
for(auto instr:instr_descr){
|
||||
root->instrs.push_back(instr);
|
||||
}
|
||||
populate_decoding_tree(root);
|
||||
}
|
||||
|
||||
inline bool is_count_limit_enabled(finish_cond_e cond){
|
||||
|
@ -228,14 +274,6 @@ inline bool is_jump_to_self_enabled(finish_cond_e cond){
|
|||
return (cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF;
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
typename arch::traits<ARCH>::opcode_e vm_impl<ARCH>::decode_inst_id(code_word_t instr){
|
||||
for(auto& e: qlut[instr&0x3]){
|
||||
if(!((instr&e.mask) ^ e.value )) return e.id;
|
||||
}
|
||||
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){
|
||||
auto pc=start;
|
||||
|
@ -257,7 +295,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
|
|||
} else {
|
||||
if (is_jump_to_self_enabled(cond) &&
|
||||
(instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
||||
auto inst_id = decode_inst_id(instr);
|
||||
auto inst_id = decode_instr(root, instr);
|
||||
// pre execution stuff
|
||||
this->core.reg.last_branch = 0;
|
||||
if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id));
|
||||
|
|
|
@ -120,57 +120,7 @@ protected:
|
|||
}
|
||||
}
|
||||
|
||||
// some compile time constants
|
||||
// enum { MASK16 = 0b1111110001100011, MASK32 = 0b11111111111100000111000001111111 };
|
||||
enum { MASK16 = 0b1111111111111111, MASK32 = 0b11111111111100000111000001111111 };
|
||||
enum { EXTR_MASK16 = MASK16 >> 2, EXTR_MASK32 = MASK32 >> 2 };
|
||||
enum { LUT_SIZE = 1 << util::bit_count(static_cast<uint32_t>(EXTR_MASK32)), LUT_SIZE_C = 1 << util::bit_count(static_cast<uint32_t>(EXTR_MASK16)) };
|
||||
|
||||
std::array<compile_func, LUT_SIZE> lut;
|
||||
|
||||
std::array<compile_func, LUT_SIZE_C> lut_00, lut_01, lut_10;
|
||||
std::array<compile_func, LUT_SIZE> lut_11;
|
||||
|
||||
std::array<compile_func *, 4> qlut;
|
||||
|
||||
std::array<const uint32_t, 4> lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}};
|
||||
|
||||
void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[],
|
||||
compile_func f) {
|
||||
if (pos < 0) {
|
||||
lut[idx] = f;
|
||||
} else {
|
||||
auto bitmask = 1UL << pos;
|
||||
if ((mask & bitmask) == 0) {
|
||||
expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f);
|
||||
} else {
|
||||
if ((valid & bitmask) == 0) {
|
||||
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f);
|
||||
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f);
|
||||
} else {
|
||||
auto new_val = idx << 1;
|
||||
if ((value & bitmask) != 0) new_val++;
|
||||
expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); }
|
||||
|
||||
uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) {
|
||||
if (pos >= 0) {
|
||||
auto bitmask = 1UL << pos;
|
||||
if ((mask & bitmask) == 0) {
|
||||
lut_val = extract_fields(pos - 1, val, mask, lut_val);
|
||||
} else {
|
||||
auto new_val = lut_val << 1;
|
||||
if ((val & bitmask) != 0) new_val++;
|
||||
lut_val = extract_fields(pos - 1, val, mask, new_val);
|
||||
}
|
||||
}
|
||||
return lut_val;
|
||||
}
|
||||
|
||||
template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
|
||||
inline S sext(U from) {
|
||||
auto mask = (1ULL<<W) - 1;
|
||||
|
@ -182,14 +132,23 @@ private:
|
|||
/****************************************************************************
|
||||
* start opcode definitions
|
||||
****************************************************************************/
|
||||
struct InstructionDesriptor {
|
||||
struct instruction_descriptor {
|
||||
size_t length;
|
||||
uint32_t value;
|
||||
uint32_t mask;
|
||||
compile_func op;
|
||||
};
|
||||
struct decoding_tree_node{
|
||||
std::vector<instruction_descriptor> instrs;
|
||||
std::vector<decoding_tree_node*> children;
|
||||
uint32_t submask = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t value;
|
||||
decoding_tree_node(uint32_t value) : value(value){}
|
||||
};
|
||||
|
||||
const std::array<InstructionDesriptor, ${instructions.size}> instr_descr = {{
|
||||
decoding_tree_node* root {nullptr};
|
||||
|
||||
const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
|
||||
/* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
|
||||
/* instruction ${instr.instruction.name}, encoding '${instr.encoding}' */
|
||||
{${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
|
||||
|
@ -227,11 +186,64 @@ private:
|
|||
vm_impl::gen_trap_check(tu);
|
||||
return BRANCH;
|
||||
}
|
||||
|
||||
//decoding functionality
|
||||
|
||||
void populate_decoding_tree(decoding_tree_node* root){
|
||||
//create submask
|
||||
for(auto instr: root->instrs){
|
||||
root->submask &= instr.mask;
|
||||
}
|
||||
//put each instr according to submask&encoding into children
|
||||
for(auto instr: root->instrs){
|
||||
bool foundMatch = false;
|
||||
for(auto child: root->children){
|
||||
//use value as identifying trait
|
||||
if(child->value == (instr.value&root->submask)){
|
||||
child->instrs.push_back(instr);
|
||||
foundMatch = true;
|
||||
}
|
||||
}
|
||||
if(!foundMatch){
|
||||
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
|
||||
child->instrs.push_back(instr);
|
||||
root->children.push_back(child);
|
||||
}
|
||||
}
|
||||
root->instrs.clear();
|
||||
//call populate_decoding_tree for all children
|
||||
if(root->children.size() >1)
|
||||
for(auto child: root->children){
|
||||
populate_decoding_tree(child);
|
||||
}
|
||||
else{
|
||||
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
|
||||
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
|
||||
return instr1.mask > instr2.mask;
|
||||
});
|
||||
}
|
||||
}
|
||||
compile_func decode_instr(decoding_tree_node* node, code_word_t word){
|
||||
if(!node->children.size()){
|
||||
if(node->instrs.size() == 1) return node->instrs[0].op;
|
||||
for(auto instr : node->instrs){
|
||||
if((instr.mask&word) == instr.value) return instr.op;
|
||||
}
|
||||
}
|
||||
else{
|
||||
for(auto child : node->children){
|
||||
if (child->value == (node->submask&word)){
|
||||
return decode_instr(child, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
|
||||
volatile CODE_WORD x = insn;
|
||||
insn = 2 * x;
|
||||
template <typename CODE_WORD> void debug_fn(CODE_WORD instr) {
|
||||
volatile CODE_WORD x = instr;
|
||||
instr = 2 * x;
|
||||
}
|
||||
|
||||
template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
|
||||
|
@ -239,14 +251,11 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
|
|||
template <typename ARCH>
|
||||
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
|
||||
: vm_base<ARCH>(core, core_id, cluster_id) {
|
||||
qlut[0] = lut_00.data();
|
||||
qlut[1] = lut_01.data();
|
||||
qlut[2] = lut_10.data();
|
||||
qlut[3] = lut_11.data();
|
||||
for (auto instr : instr_descr) {
|
||||
auto quantrant = instr.value & 0x3;
|
||||
expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op);
|
||||
root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
|
||||
for(auto instr:instr_descr){
|
||||
root->instrs.push_back(instr);
|
||||
}
|
||||
populate_decoding_tree(root);
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
|
@ -254,30 +263,19 @@ std::tuple<continuation_e>
|
|||
vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) {
|
||||
// we fetch at max 4 byte, alignment is 2
|
||||
enum {TRAP_ID=1<<16};
|
||||
code_word_t insn = 0;
|
||||
// const typename traits::addr_t upper_bits = ~traits::PGMASK;
|
||||
code_word_t instr = 0;
|
||||
phys_addr_t paddr(pc);
|
||||
auto *const data = (uint8_t *)&insn;
|
||||
paddr = this->core.v2p(pc);
|
||||
// if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
|
||||
// auto res = this->core.read(paddr, 2, data);
|
||||
// if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
|
||||
// if ((insn & 0x3) == 0x3) { // this is a 32bit instruction
|
||||
// res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
|
||||
// }
|
||||
// } else {
|
||||
auto res = this->core.read(paddr, 4, data);
|
||||
if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
|
||||
// }
|
||||
if (insn == 0x0000006f || (insn&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
||||
auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
|
||||
if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
|
||||
if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
||||
// curr pc on stack
|
||||
++inst_cnt;
|
||||
auto lut_val = extract_fields(insn);
|
||||
auto f = qlut[insn & 0x3][lut_val];
|
||||
auto f = decode_instr(root, instr);
|
||||
if (f == nullptr) {
|
||||
f = &this_class::illegal_intruction;
|
||||
}
|
||||
return (this->*f)(pc, insn, tu);
|
||||
return (this->*f)(pc, instr, tu);
|
||||
}
|
||||
|
||||
template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) {
|
||||
|
|
|
@ -152,14 +152,22 @@ private:
|
|||
/****************************************************************************
|
||||
* start opcode definitions
|
||||
****************************************************************************/
|
||||
struct InstructionDesriptor {
|
||||
struct instruction_descriptor {
|
||||
size_t length;
|
||||
uint32_t value;
|
||||
uint32_t mask;
|
||||
typename arch::traits<ARCH>::opcode_e op;
|
||||
};
|
||||
struct decoding_tree_node{
|
||||
std::vector<instruction_descriptor> instrs;
|
||||
std::vector<decoding_tree_node*> children;
|
||||
uint32_t submask = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t value;
|
||||
decoding_tree_node(uint32_t value) : value(value){}
|
||||
};
|
||||
|
||||
const std::array<InstructionDesriptor, 87> instr_descr = {{
|
||||
decoding_tree_node* root {nullptr};
|
||||
const std::array<instruction_descriptor, 87> instr_descr = {{
|
||||
/* entries are: size, valid value, valid mask, function ptr */
|
||||
{32, 0b00000000000000000000000000110111, 0b00000000000000000000000001111111, arch::traits<ARCH>::opcode_e::LUI},
|
||||
{32, 0b00000000000000000000000000010111, 0b00000000000000000000000001111111, arch::traits<ARCH>::opcode_e::AUIPC},
|
||||
|
@ -250,7 +258,6 @@ private:
|
|||
{16, 0b0000000000000000, 0b1111111111111111, arch::traits<ARCH>::opcode_e::DII},
|
||||
}};
|
||||
|
||||
//static constexpr typename traits::addr_t upper_bits = ~traits::PGMASK;
|
||||
iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
|
||||
auto phys_pc = this->core.v2p(pc);
|
||||
//TODO: re-add page handling
|
||||
|
@ -263,6 +270,56 @@ private:
|
|||
//}
|
||||
return iss::Ok;
|
||||
}
|
||||
void populate_decoding_tree(decoding_tree_node* root){
|
||||
//create submask
|
||||
for(auto instr: root->instrs){
|
||||
root->submask &= instr.mask;
|
||||
}
|
||||
//put each instr according to submask&encoding into children
|
||||
for(auto instr: root->instrs){
|
||||
bool foundMatch = false;
|
||||
for(auto child: root->children){
|
||||
//use value as identifying trait
|
||||
if(child->value == (instr.value&root->submask)){
|
||||
child->instrs.push_back(instr);
|
||||
foundMatch = true;
|
||||
}
|
||||
}
|
||||
if(!foundMatch){
|
||||
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
|
||||
child->instrs.push_back(instr);
|
||||
root->children.push_back(child);
|
||||
}
|
||||
}
|
||||
root->instrs.clear();
|
||||
//call populate_decoding_tree for all children
|
||||
if(root->children.size() >1)
|
||||
for(auto child: root->children){
|
||||
populate_decoding_tree(child);
|
||||
}
|
||||
else{
|
||||
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
|
||||
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
|
||||
return instr1.mask > instr2.mask;
|
||||
});
|
||||
}
|
||||
}
|
||||
typename arch::traits<ARCH>::opcode_e decode_instr(decoding_tree_node* node, code_word_t word){
|
||||
if(!node->children.size()){
|
||||
if(node->instrs.size() == 1) return node->instrs[0].op;
|
||||
for(auto instr : node->instrs){
|
||||
if((instr.mask&word) == instr.value) return instr.op;
|
||||
}
|
||||
}
|
||||
else{
|
||||
for(auto child : node->children){
|
||||
if (child->value == (node->submask&word)){
|
||||
return decode_instr(child, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
|
||||
|
@ -289,16 +346,11 @@ constexpr size_t bit_count(uint32_t u) {
|
|||
template <typename ARCH>
|
||||
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
|
||||
: vm_base<ARCH>(core, core_id, cluster_id) {
|
||||
unsigned id=0;
|
||||
for (auto instr : instr_descr) {
|
||||
auto quadrant = instr.value & 0x3;
|
||||
qlut[quadrant].push_back(instruction_pattern{instr.value, instr.mask, instr.op});
|
||||
}
|
||||
for(auto& lut: qlut){
|
||||
std::sort(std::begin(lut), std::end(lut), [](instruction_pattern const& a, instruction_pattern const& b){
|
||||
return bit_count(a.mask) > bit_count(b.mask);
|
||||
});
|
||||
root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
|
||||
for(auto instr:instr_descr){
|
||||
root->instrs.push_back(instr);
|
||||
}
|
||||
populate_decoding_tree(root);
|
||||
}
|
||||
|
||||
inline bool is_count_limit_enabled(finish_cond_e cond){
|
||||
|
@ -309,14 +361,6 @@ inline bool is_jump_to_self_enabled(finish_cond_e cond){
|
|||
return (cond & finish_cond_e::JUMP_TO_SELF) == finish_cond_e::JUMP_TO_SELF;
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
typename arch::traits<ARCH>::opcode_e vm_impl<ARCH>::decode_inst_id(code_word_t instr){
|
||||
for(auto& e: qlut[instr&0x3]){
|
||||
if(!((instr&e.mask) ^ e.value )) return e.id;
|
||||
}
|
||||
return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){
|
||||
auto pc=start;
|
||||
|
@ -338,7 +382,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
|
|||
} else {
|
||||
if (is_jump_to_self_enabled(cond) &&
|
||||
(instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
||||
auto inst_id = decode_inst_id(instr);
|
||||
auto inst_id = decode_instr(root, instr);
|
||||
// pre execution stuff
|
||||
this->core.reg.last_branch = 0;
|
||||
if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id));
|
||||
|
|
|
@ -120,57 +120,7 @@ protected:
|
|||
}
|
||||
}
|
||||
|
||||
// some compile time constants
|
||||
// enum { MASK16 = 0b1111110001100011, MASK32 = 0b11111111111100000111000001111111 };
|
||||
enum { MASK16 = 0b1111111111111111, MASK32 = 0b11111111111100000111000001111111 };
|
||||
enum { EXTR_MASK16 = MASK16 >> 2, EXTR_MASK32 = MASK32 >> 2 };
|
||||
enum { LUT_SIZE = 1 << util::bit_count(static_cast<uint32_t>(EXTR_MASK32)), LUT_SIZE_C = 1 << util::bit_count(static_cast<uint32_t>(EXTR_MASK16)) };
|
||||
|
||||
std::array<compile_func, LUT_SIZE> lut;
|
||||
|
||||
std::array<compile_func, LUT_SIZE_C> lut_00, lut_01, lut_10;
|
||||
std::array<compile_func, LUT_SIZE> lut_11;
|
||||
|
||||
std::array<compile_func *, 4> qlut;
|
||||
|
||||
std::array<const uint32_t, 4> lutmasks = {{EXTR_MASK16, EXTR_MASK16, EXTR_MASK16, EXTR_MASK32}};
|
||||
|
||||
void expand_bit_mask(int pos, uint32_t mask, uint32_t value, uint32_t valid, uint32_t idx, compile_func lut[],
|
||||
compile_func f) {
|
||||
if (pos < 0) {
|
||||
lut[idx] = f;
|
||||
} else {
|
||||
auto bitmask = 1UL << pos;
|
||||
if ((mask & bitmask) == 0) {
|
||||
expand_bit_mask(pos - 1, mask, value, valid, idx, lut, f);
|
||||
} else {
|
||||
if ((valid & bitmask) == 0) {
|
||||
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1), lut, f);
|
||||
expand_bit_mask(pos - 1, mask, value, valid, (idx << 1) + 1, lut, f);
|
||||
} else {
|
||||
auto new_val = idx << 1;
|
||||
if ((value & bitmask) != 0) new_val++;
|
||||
expand_bit_mask(pos - 1, mask, value, valid, new_val, lut, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t extract_fields(uint32_t val) { return extract_fields(29, val >> 2, lutmasks[val & 0x3], 0); }
|
||||
|
||||
uint32_t extract_fields(int pos, uint32_t val, uint32_t mask, uint32_t lut_val) {
|
||||
if (pos >= 0) {
|
||||
auto bitmask = 1UL << pos;
|
||||
if ((mask & bitmask) == 0) {
|
||||
lut_val = extract_fields(pos - 1, val, mask, lut_val);
|
||||
} else {
|
||||
auto new_val = lut_val << 1;
|
||||
if ((val & bitmask) != 0) new_val++;
|
||||
lut_val = extract_fields(pos - 1, val, mask, new_val);
|
||||
}
|
||||
}
|
||||
return lut_val;
|
||||
}
|
||||
|
||||
template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
|
||||
inline S sext(U from) {
|
||||
auto mask = (1ULL<<W) - 1;
|
||||
|
@ -182,14 +132,23 @@ private:
|
|||
/****************************************************************************
|
||||
* start opcode definitions
|
||||
****************************************************************************/
|
||||
struct InstructionDesriptor {
|
||||
struct instruction_descriptor {
|
||||
size_t length;
|
||||
uint32_t value;
|
||||
uint32_t mask;
|
||||
compile_func op;
|
||||
};
|
||||
struct decoding_tree_node{
|
||||
std::vector<instruction_descriptor> instrs;
|
||||
std::vector<decoding_tree_node*> children;
|
||||
uint32_t submask = std::numeric_limits<uint32_t>::max();
|
||||
uint32_t value;
|
||||
decoding_tree_node(uint32_t value) : value(value){}
|
||||
};
|
||||
|
||||
const std::array<InstructionDesriptor, 87> instr_descr = {{
|
||||
decoding_tree_node* root {nullptr};
|
||||
|
||||
const std::array<instruction_descriptor, 87> instr_descr = {{
|
||||
/* entries are: size, valid value, valid mask, function ptr */
|
||||
/* instruction LUI, encoding '0b00000000000000000000000000110111' */
|
||||
{32, 0b00000000000000000000000000110111, 0b00000000000000000000000001111111, &this_class::__lui},
|
||||
|
@ -493,14 +452,14 @@ private:
|
|||
this->gen_raise_trap(tu, 0, 2);
|
||||
}
|
||||
else{
|
||||
auto new_pc = tu.assignment(tu.ext((tu.bitwise_and((tu.add(tu.load(rs1+ traits::X0, 0),tu.constant((int16_t)sext<12>(imm),16))),tu.constant(~ 0x1,8))),32,true),32);
|
||||
auto new_pc = tu.assignment(tu.ext((tu.bitwise_and((tu.add(tu.load(rs1+ traits::X0, 0),tu.constant((int16_t)sext<12>(imm),16))),tu.constant(~0x1,8))),32,true),32);
|
||||
tu.open_if(tu.srem(new_pc,tu.constant(static_cast<uint32_t>(traits:: INSTR_ALIGNMENT),32)));
|
||||
this->gen_raise_trap(tu, 0, 0);
|
||||
tu.open_else();
|
||||
if(rd!= 0) {
|
||||
tu.store(rd + traits::X0,tu.ext((tu.add(tu.ext(cur_pc_val,32,false),tu.constant( 4,8))),32,true));
|
||||
}
|
||||
auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(new_pc,tu.constant(~ 0x1,8)),32);
|
||||
auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(new_pc,tu.constant(~0x1,8)),32);
|
||||
tu.store(traits::NEXT_PC, PC_val_v);
|
||||
tu.store(traits::LAST_BRANCH, tu.constant(2U, 2));
|
||||
tu.close_scope();
|
||||
|
@ -1962,7 +1921,7 @@ private:
|
|||
else{
|
||||
auto xrd = tu.assignment(tu.read_mem(traits::CSR, csr, 32),32);
|
||||
if(zimm!= 0) {
|
||||
tu.write_mem(traits::CSR, csr, tu.bitwise_and(xrd,tu.constant(~ ((uint32_t)zimm),32)));
|
||||
tu.write_mem(traits::CSR, csr, tu.bitwise_and(xrd,tu.constant(~((uint32_t)zimm),32)));
|
||||
}
|
||||
if(rd!= 0) {
|
||||
tu.store(rd + traits::X0,xrd);
|
||||
|
@ -2163,13 +2122,13 @@ private:
|
|||
auto divisor = tu.assignment(tu.ext(tu.load(rs2+ traits::X0, 0),32,false),32);
|
||||
if(rd!= 0){ tu.open_if(tu.icmp(ICmpInst::ICMP_NE,divisor,tu.constant( 0,8)));
|
||||
auto MMIN = tu.assignment(tu.constant(((uint32_t)1)<<(static_cast<uint32_t>(traits:: XLEN)-1),32),32);
|
||||
tu.open_if(tu.logical_and(tu.icmp(ICmpInst::ICMP_EQ,tu.load(rs1+ traits::X0, 0),MMIN),tu.icmp(ICmpInst::ICMP_EQ,divisor,tu.constant(- 1,8))));
|
||||
tu.open_if(tu.logical_and(tu.icmp(ICmpInst::ICMP_EQ,tu.load(rs1+ traits::X0, 0),MMIN),tu.icmp(ICmpInst::ICMP_EQ,divisor,tu.constant(-1,8))));
|
||||
tu.store(rd + traits::X0,MMIN);
|
||||
tu.open_else();
|
||||
tu.store(rd + traits::X0,tu.ext((tu.sdiv(dividend,divisor)),32,true));
|
||||
tu.close_scope();
|
||||
tu.open_else();
|
||||
tu.store(rd + traits::X0,tu.constant((uint32_t)- 1,32));
|
||||
tu.store(rd + traits::X0,tu.constant((uint32_t)-1,32));
|
||||
tu.close_scope();
|
||||
}
|
||||
}
|
||||
|
@ -2208,7 +2167,7 @@ private:
|
|||
}
|
||||
tu.open_else();
|
||||
if(rd!=0) {
|
||||
tu.store(rd + traits::X0,tu.constant((uint32_t)- 1,32));
|
||||
tu.store(rd + traits::X0,tu.constant((uint32_t)-1,32));
|
||||
}
|
||||
tu.close_scope();
|
||||
}
|
||||
|
@ -2243,7 +2202,7 @@ private:
|
|||
else{
|
||||
tu.open_if(tu.icmp(ICmpInst::ICMP_NE,tu.load(rs2+ traits::X0, 0),tu.constant( 0,8)));
|
||||
auto MMIN = tu.assignment(tu.constant( 1<<(static_cast<uint32_t>(traits:: XLEN)-1),8),32);
|
||||
tu.open_if(tu.logical_and(tu.icmp(ICmpInst::ICMP_EQ,tu.load(rs1+ traits::X0, 0),MMIN),tu.icmp(ICmpInst::ICMP_EQ,tu.ext(tu.load(rs2+ traits::X0, 0),32,false),tu.constant(- 1,8))));
|
||||
tu.open_if(tu.logical_and(tu.icmp(ICmpInst::ICMP_EQ,tu.load(rs1+ traits::X0, 0),MMIN),tu.icmp(ICmpInst::ICMP_EQ,tu.ext(tu.load(rs2+ traits::X0, 0),32,false),tu.constant(-1,8))));
|
||||
if(rd!=0) {
|
||||
tu.store(rd + traits::X0,tu.constant( 0,8));
|
||||
}
|
||||
|
@ -2955,7 +2914,7 @@ private:
|
|||
gen_set_pc(tu, pc, traits::NEXT_PC);
|
||||
tu.open_scope();
|
||||
if(rs1&&rs1<static_cast<uint32_t>(traits:: RFS)) {
|
||||
auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(tu.load(rs1%static_cast<uint32_t>(traits:: RFS)+ traits::X0, 0),tu.constant(~ 0x1,8)),32);
|
||||
auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(tu.load(rs1%static_cast<uint32_t>(traits:: RFS)+ traits::X0, 0),tu.constant(~0x1,8)),32);
|
||||
tu.store(traits::NEXT_PC, PC_val_v);
|
||||
tu.store(traits::LAST_BRANCH, tu.constant(2U, 2));
|
||||
}
|
||||
|
@ -3043,7 +3002,7 @@ private:
|
|||
else{
|
||||
auto new_pc = tu.assignment(tu.load(rs1+ traits::X0, 0),32);
|
||||
tu.store(1 + traits::X0,tu.ext((tu.add(tu.ext(cur_pc_val,32,false),tu.constant( 2,8))),32,true));
|
||||
auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(new_pc,tu.constant(~ 0x1,8)),32);
|
||||
auto PC_val_v = tu.assignment("PC_val", tu.bitwise_and(new_pc,tu.constant(~0x1,8)),32);
|
||||
tu.store(traits::NEXT_PC, PC_val_v);
|
||||
tu.store(traits::LAST_BRANCH, tu.constant(2U, 2));
|
||||
}
|
||||
|
@ -3136,11 +3095,64 @@ private:
|
|||
vm_impl::gen_trap_check(tu);
|
||||
return BRANCH;
|
||||
}
|
||||
|
||||
//decoding functionality
|
||||
|
||||
void populate_decoding_tree(decoding_tree_node* root){
|
||||
//create submask
|
||||
for(auto instr: root->instrs){
|
||||
root->submask &= instr.mask;
|
||||
}
|
||||
//put each instr according to submask&encoding into children
|
||||
for(auto instr: root->instrs){
|
||||
bool foundMatch = false;
|
||||
for(auto child: root->children){
|
||||
//use value as identifying trait
|
||||
if(child->value == (instr.value&root->submask)){
|
||||
child->instrs.push_back(instr);
|
||||
foundMatch = true;
|
||||
}
|
||||
}
|
||||
if(!foundMatch){
|
||||
decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
|
||||
child->instrs.push_back(instr);
|
||||
root->children.push_back(child);
|
||||
}
|
||||
}
|
||||
root->instrs.clear();
|
||||
//call populate_decoding_tree for all children
|
||||
if(root->children.size() >1)
|
||||
for(auto child: root->children){
|
||||
populate_decoding_tree(child);
|
||||
}
|
||||
else{
|
||||
//sort instrs by value of the mask, this works bc we want to have the least restrictive one last
|
||||
std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
|
||||
return instr1.mask > instr2.mask;
|
||||
});
|
||||
}
|
||||
}
|
||||
compile_func decode_instr(decoding_tree_node* node, code_word_t word){
|
||||
if(!node->children.size()){
|
||||
if(node->instrs.size() == 1) return node->instrs[0].op;
|
||||
for(auto instr : node->instrs){
|
||||
if((instr.mask&word) == instr.value) return instr.op;
|
||||
}
|
||||
}
|
||||
else{
|
||||
for(auto child : node->children){
|
||||
if (child->value == (node->submask&word)){
|
||||
return decode_instr(child, word);
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
|
||||
volatile CODE_WORD x = insn;
|
||||
insn = 2 * x;
|
||||
template <typename CODE_WORD> void debug_fn(CODE_WORD instr) {
|
||||
volatile CODE_WORD x = instr;
|
||||
instr = 2 * x;
|
||||
}
|
||||
|
||||
template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
|
||||
|
@ -3148,14 +3160,11 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
|
|||
template <typename ARCH>
|
||||
vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
|
||||
: vm_base<ARCH>(core, core_id, cluster_id) {
|
||||
qlut[0] = lut_00.data();
|
||||
qlut[1] = lut_01.data();
|
||||
qlut[2] = lut_10.data();
|
||||
qlut[3] = lut_11.data();
|
||||
for (auto instr : instr_descr) {
|
||||
auto quantrant = instr.value & 0x3;
|
||||
expand_bit_mask(29, lutmasks[quantrant], instr.value >> 2, instr.mask >> 2, 0, qlut[quantrant], instr.op);
|
||||
root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
|
||||
for(auto instr:instr_descr){
|
||||
root->instrs.push_back(instr);
|
||||
}
|
||||
populate_decoding_tree(root);
|
||||
}
|
||||
|
||||
template <typename ARCH>
|
||||
|
@ -3163,10 +3172,8 @@ std::tuple<continuation_e>
|
|||
vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) {
|
||||
// we fetch at max 4 byte, alignment is 2
|
||||
enum {TRAP_ID=1<<16};
|
||||
code_word_t insn = 0;
|
||||
// const typename traits::addr_t upper_bits = ~traits::PGMASK;
|
||||
code_word_t instr = 0;
|
||||
phys_addr_t paddr(pc);
|
||||
auto *const data = (uint8_t *)&insn;
|
||||
paddr = this->core.v2p(pc);
|
||||
//TODO: re-add page handling
|
||||
// if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
|
||||
|
@ -3176,18 +3183,17 @@ vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt,
|
|||
// res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
|
||||
// }
|
||||
// } else {
|
||||
auto res = this->core.read(paddr, 4, data);
|
||||
auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
|
||||
if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
|
||||
// }
|
||||
if (insn == 0x0000006f || (insn&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
||||
if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
|
||||
// curr pc on stack
|
||||
++inst_cnt;
|
||||
auto lut_val = extract_fields(insn);
|
||||
auto f = qlut[insn & 0x3][lut_val];
|
||||
auto f = decode_instr(root, instr);
|
||||
if (f == nullptr) {
|
||||
f = &this_class::illegal_intruction;
|
||||
}
|
||||
return (this->*f)(pc, insn, tu);
|
||||
return (this->*f)(pc, instr, tu);
|
||||
}
|
||||
|
||||
template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) {
|
||||
|
|
Loading…
Reference in New Issue