fixes htif behavior and instrumentation interface

changes the io_buf
factors clic & pmp into separate units
2025-03-14 19:43:20 +01:00 · 2025-03-14 12:14:20 +01:00 · 2025-03-13 12:13:41 +01:00 · 2025-03-12 09:26:51 +01:00 · 2025-03-11 08:31:25 +01:00 · 2025-03-10 16:00:26 +01:00
524 changed files with 59633 additions and 55250 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,10 @@ add_subdirectory(softfloat)
 set(LIB_SOURCES
    src/iss/plugin/instruction_count.cpp
    src/iss/arch/tgc5c.cpp
    src/iss/mmio/memory_if.cpp
    src/vm/interp/vm_tgc5c.cpp
    src/vm/fp_functions.cpp
    src/iss/debugger/csr_names.cpp
    src/iss/semihosting/semihosting.cpp
 )
@@ -108,16 +110,6 @@ if(TARGET yaml-cpp::yaml-cpp)
    target_link_libraries(${PROJECT_NAME} PUBLIC yaml-cpp::yaml-cpp)
 endif()
 if(WITH_LLVM)
    find_package(LLVM)
    target_compile_definitions(${PROJECT_NAME} PUBLIC ${LLVM_DEFINITIONS})
    target_include_directories(${PROJECT_NAME} PUBLIC ${LLVM_INCLUDE_DIRS})
    if(BUILD_SHARED_LIBS)
        target_link_libraries(${PROJECT_NAME} PUBLIC ${LLVM_LIBRARIES})
    endif()
 endif()
 set_target_properties(${PROJECT_NAME} PROPERTIES
    VERSION ${PROJECT_VERSION}
    FRAMEWORK FALSE
@@ -261,3 +253,9 @@ if(TARGET scc-sysc)
        INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # headers
    )
 endif()
 project(elfio-test)
 find_package(Boost COMPONENTS program_options thread REQUIRED)
 add_executable(${PROJECT_NAME} src/elfio.cpp)
 target_link_libraries(${PROJECT_NAME} PUBLIC elfio::elfio)
--- a/gen_input/templates/CORENAME.cpp.gtl
+++ b/gen_input/templates/CORENAME.cpp.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017 - 2020 MINRES Technologies GmbH
+ * Copyright (C) 2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
--- a/gen_input/templates/CORENAME.h.gtl
+++ b/gen_input/templates/CORENAME.h.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017 - 2021 MINRES Technologies GmbH
+ * Copyright (C) 2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -131,8 +131,6 @@ struct ${coreDef.name.toLowerCase()}: public arch_if {
    uint8_t* get_regs_base_ptr() override;
    inline uint64_t get_icount() { return reg.icount; }
    inline bool should_stop() { return interrupt_sim; }
    inline uint64_t stop_code() { return interrupt_sim; }
@@ -141,8 +139,6 @@ struct ${coreDef.name.toLowerCase()}: public arch_if {
    virtual iss::sync_type needed_sync() const { return iss::NO_SYNC; }
    inline uint32_t get_last_branch() { return reg.last_branch; }
 #pragma pack(push, 1)
    struct ${coreDef.name}_regs {<%
--- a/gen_input/templates/CORENAME_sysc.cpp.gtl
+++ b/gen_input/templates/CORENAME_sysc.cpp.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2023 MINRES Technologies GmbH
+ * Copyright (C) 2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -45,17 +45,17 @@ namespace interp {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -66,17 +66,17 @@ namespace llvm {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -88,17 +88,17 @@ namespace tcc {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -110,17 +110,17 @@ namespace asmjit {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
--- a/gen_input/templates/asmjit/CORENAME.cpp.gtl
+++ b/gen_input/templates/asmjit/CORENAME.cpp.gtl
@@ -37,7 +37,10 @@
 #include <iss/asmjit/vm_base.h>
 #include <asmjit/asmjit.h>
 #include <util/logging.h>
-
+#include <iss/instruction_decoder.h>
 <%def fcsr = registers.find {it.name=='FCSR'}
 if(fcsr != null) {%>
 #include <vm/fp_functions.h><%}%>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -80,28 +83,32 @@ public:
 protected:
    using super::get_ptr_for;
 using super::get_reg;
    using super::get_reg_for;
    using super::get_reg_for_Gp;
    using super::load_reg_from_mem;
    using super::load_reg_from_mem_Gp;
    using super::write_reg_to_mem;
    using super::gen_ext;
    using super::gen_read_mem;
    using super::gen_write_mem;
    using super::gen_wait;
    using super::gen_leave;
-    using super::gen_operation;
+    using super::gen_sync;
    using this_class = vm_impl<ARCH>;
    using compile_func = continuation_e (this_class::*)(virt_addr_t&, code_word_t, jit_holder&);
-    continuation_e gen_single_inst_behavior(virt_addr_t&, unsigned int &, jit_holder&) override;
+    continuation_e gen_single_inst_behavior(virt_addr_t&, jit_holder&) override;
    enum globals_e {TVAL = 0, GLOBALS_SIZE};
    void gen_block_prologue(jit_holder& jh) override;
    void gen_block_epilogue(jit_holder& jh) override;
    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
-
+<%if(fcsr != null) {%>
    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
 <%}%>
    void gen_instr_prologue(jit_holder& jh);
    void gen_instr_epilogue(jit_holder& jh);
    inline void gen_raise(jit_holder& jh, uint16_t trap_id, uint16_t cause);
    template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type> void gen_set_tval(jit_holder& jh, T new_tval) ;
    void gen_set_tval(jit_holder& jh, x86_reg_t _new_tval) ;
    template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
    inline S sext(U from) {
@@ -109,25 +116,19 @@ using super::get_reg;
        auto sign_mask = 1ULL<<(W-1);
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
    }
 <%functions.each{ it.eachLine { %>
    ${it}<%}%>
 <%}%>
 private:
    /****************************************************************************
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
-        size_t length;
+        uint32_t length;
        uint32_t value;
        uint32_t mask;
        compile_func op;
    };
    struct decoding_tree_node{
        std::vector<instruction_descriptor> instrs;
        std::vector<decoding_tree_node*> children;
        uint32_t submask = std::numeric_limits<uint32_t>::max();
        uint32_t value;
        decoding_tree_node(uint32_t value) : value(value){}
    };
    decoding_tree_node* root {nullptr};
    const std::array<instruction_descriptor, ${instructions.size()}> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
@@ -135,6 +136,9 @@ private:
        {${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
    }};
    //needs to be declared after instr_descr
    decoder instr_decoder;
    /* instruction definitions */<%instructions.eachWithIndex{instr, idx -> %>
    /* instruction ${idx}: ${instr.name} */
    continuation_e __${generator.functionName(instr.name)}(virt_addr_t& pc, code_word_t instr, jit_holder& jh){
@@ -147,7 +151,7 @@ private:
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
            jh.disass_collection.push_back(mnemonic_ptr);
-            jh.cc.invoke(&call_print_disass, &print_disass, FuncSignatureT<void, void *, uint64_t, char *>());
+            jh.cc.invoke(&call_print_disass, &print_disass, FuncSignature::build<void, void *, uint64_t, char *>());
            call_print_disass->setArg(0, jh.arch_if_ptr);
            call_print_disass->setArg(1, pc.val);
            call_print_disass->setArg(2, mnemonic_ptr);
@@ -155,87 +159,49 @@ private:
        }
        x86::Compiler& cc = jh.cc;
        cc.comment(fmt::format("${instr.name}_{:#x}:",pc.val).c_str());
-        this->gen_sync(jh, PRE_SYNC, ${idx});
+        gen_sync(jh, PRE_SYNC, ${idx});
-        cc.mov(jh.pc, pc.val);
+        mov(cc, jh.pc, pc.val);
        gen_set_tval(jh, instr);
        pc = pc+${instr.length/8};
-        cc.mov(jh.next_pc, pc.val);
+        mov(cc, jh.next_pc, pc.val);
        gen_instr_prologue(jh);
        cc.comment("//behavior:");
        /*generate behavior*/
        <%instr.behavior.eachLine{%>${it}
        <%}%>
        gen_sync(jh, POST_SYNC, ${idx});
        gen_instr_epilogue(jh);
        this->gen_sync(jh, POST_SYNC, ${idx});
    	return returnValue;        
    }
    <%}%>
    /****************************************************************************
     * end opcode definitions
     ****************************************************************************/
-    continuation_e illegal_intruction(virt_addr_t &pc, code_word_t instr, jit_holder& jh ) {
+    continuation_e illegal_instruction(virt_addr_t &pc, code_word_t instr, jit_holder& jh ) {
        x86::Compiler& cc = jh.cc;
-        cc.comment(fmt::format("illegal_intruction{:#x}:",pc.val).c_str());
+        if(this->disass_enabled){          
-        this->gen_sync(jh, PRE_SYNC, instr_descr.size());
+            auto mnemonic = std::string("illegal_instruction");
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
            jh.disass_collection.push_back(mnemonic_ptr);
            jh.cc.invoke(&call_print_disass, &print_disass, FuncSignature::build<void, void *, uint64_t, char *>());
            call_print_disass->setArg(0, jh.arch_if_ptr);
            call_print_disass->setArg(1, pc.val);
            call_print_disass->setArg(2, mnemonic_ptr);
        }
        cc.comment(fmt::format("illegal_instruction{:#x}:",pc.val).c_str());
        gen_sync(jh, PRE_SYNC, instr_descr.size());
        mov(cc, jh.pc, pc.val);
        gen_set_tval(jh, instr);
        pc = pc + ((instr & 3) == 3 ? 4 : 2);
        mov(cc, jh.next_pc, pc.val);
        gen_instr_prologue(jh);
        cc.comment("//behavior:");
        gen_raise(jh, 0, 2);
        gen_sync(jh, POST_SYNC, instr_descr.size());
        gen_instr_epilogue(jh);
-        this->gen_sync(jh, POST_SYNC, instr_descr.size());
+        return ILLEGAL_INSTR;
        return BRANCH;
    }
    //decoding functionality
    void populate_decoding_tree(decoding_tree_node* root){
        //create submask
        for(auto instr: root->instrs){
            root->submask &= instr.mask;
        }
        //put each instr according to submask&encoding into children
        for(auto instr: root->instrs){
            bool foundMatch = false;
            for(auto child: root->children){
                //use value as identifying trait
                if(child->value == (instr.value&root->submask)){
                    child->instrs.push_back(instr);
                    foundMatch = true;
                }
            }
            if(!foundMatch){
                decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
                child->instrs.push_back(instr);
                root->children.push_back(child);
            }
        }
        root->instrs.clear();
        //call populate_decoding_tree for all children
        if(root->children.size() >1)
            for(auto child: root->children){
                populate_decoding_tree(child);      
            }
        else{
            //sort instrs by value of the mask, this works bc we want to have the least restrictive one last
            std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
            return instr1.mask > instr2.mask;
            }); 
        }
    }
    compile_func decode_instr(decoding_tree_node* node, code_word_t word){
        if(!node->children.size()){
            if(node->instrs.size() == 1) return node->instrs[0].op;
            for(auto instr : node->instrs){
                if((instr.mask&word) == instr.value) return instr.op;
            }
        }
        else{
            for(auto child : node->children){
                if (child->value == (node->submask&word)){
                    return decode_instr(child, word);
                }  
            }  
        }
        return nullptr;
    }
 };
@@ -243,16 +209,19 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
 template <typename ARCH>
 vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
-: vm_base<ARCH>(core, core_id, cluster_id) {
+: vm_base<ARCH>(core, core_id, cluster_id)
-    root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
+, instr_decoder([this]() {
-    for(auto instr: instr_descr){
+        std::vector<generic_instruction_descriptor> g_instr_descr;
-        root->instrs.push_back(instr);
+        g_instr_descr.reserve(instr_descr.size());
-    }
+        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
-    populate_decoding_tree(root);
+            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
-}
+            g_instr_descr.push_back(new_instr_descr);
        }
        return std::move(g_instr_descr);
    }()) {}
 template <typename ARCH>
-continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, jit_holder& jh) {
+continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, jit_holder& jh) {
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
@@ -261,13 +230,15 @@ continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned
        paddr = this->core.virt2phys(pc);
    auto res = this->core.read(paddr, 4, data);
    if (res != iss::Ok)
-        throw trap_access(TRAP_ID, pc.val);
+        return ILLEGAL_FETCH;
    if (instr == 0x0000006f || (instr&0xffff)==0xa001)
-        throw simulation_stopped(0); // 'J 0' or 'C.J 0'
+        return JUMP_TO_SELF;
-    ++inst_cnt;
+    uint32_t inst_index = instr_decoder.decode_instr(instr);
-    auto f = decode_instr(root, instr);
+    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
        f = instr_descr[inst_index].op;
    if (f == nullptr) 
-        f = &this_class::illegal_intruction;
+        f = &this_class::illegal_instruction;
    return (this->*f)(pc, instr, jh);
 }
 template <typename ARCH>
@@ -275,11 +246,10 @@ void vm_impl<ARCH>::gen_instr_prologue(jit_holder& jh) {
    auto& cc = jh.cc;
    cc.comment("//gen_instr_prologue");
    cc.inc(get_ptr_for(jh, traits::ICOUNT));
-    x86::Gp current_trap_state = get_reg_for(jh, traits::TRAP_STATE);
+    x86_reg_t current_trap_state = get_reg_for(cc, traits::TRAP_STATE);
-    cc.mov(current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
+    mov(cc, current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
-    cc.mov(get_ptr_for(jh, traits::PENDING_TRAP), current_trap_state);
+    mov(cc, get_ptr_for(jh, traits::PENDING_TRAP), current_trap_state);
 }
 template <typename ARCH>
@@ -287,16 +257,19 @@ void vm_impl<ARCH>::gen_instr_epilogue(jit_holder& jh) {
    auto& cc = jh.cc;
    cc.comment("//gen_instr_epilogue");
-    x86::Gp current_trap_state = get_reg_for(jh, traits::TRAP_STATE);
+    x86_reg_t current_trap_state = get_reg_for(cc, traits::TRAP_STATE);
-    cc.mov(current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
+    mov(cc, current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
-    cc.cmp(current_trap_state, 0);
+    cmp(cc, current_trap_state, 0);
    cc.jne(jh.trap_entry);
    cc.inc(get_ptr_for(jh, traits::ICOUNT));
    cc.inc(get_ptr_for(jh, traits::CYCLE));
 }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_block_prologue(jit_holder& jh){
-
+    jh.pc = load_reg_from_mem_Gp(jh, traits::PC);
-    jh.pc = load_reg_from_mem(jh, traits::PC);
+    jh.next_pc = load_reg_from_mem_Gp(jh, traits::NEXT_PC);
-    jh.next_pc = load_reg_from_mem(jh, traits::NEXT_PC);
+    jh.globals.resize(GLOBALS_SIZE);
    jh.globals[TVAL] = get_reg_Gp(jh.cc, 64, false);
 }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_block_epilogue(jit_holder& jh){
@@ -306,39 +279,52 @@ void vm_impl<ARCH>::gen_block_epilogue(jit_holder& jh){
    cc.bind(jh.trap_entry);
    this->write_back(jh);
    this->gen_sync(jh, POST_SYNC, -1);
-    x86::Gp current_trap_state = get_reg_for(jh, traits::TRAP_STATE);
+    x86::Gp current_trap_state = get_reg_for_Gp(cc, traits::TRAP_STATE);
-    cc.mov(current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
+    mov(cc, current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
-    x86::Gp current_pc = get_reg_for(jh, traits::PC);
+    x86::Gp current_pc = get_reg_for_Gp(cc, traits::PC);
-    cc.mov(current_pc, get_ptr_for(jh, traits::PC));
+    mov(cc, current_pc, get_ptr_for(jh, traits::PC));
    x86::Gp instr = cc.newInt32("instr");
    cc.mov(instr, 0); // FIXME:this is not correct
    cc.comment("//enter trap call;");
    InvokeNode* call_enter_trap;
-    cc.invoke(&call_enter_trap, &enter_trap, FuncSignatureT<uint64_t, void*, uint64_t, uint64_t, uint64_t>());
+    cc.invoke(&call_enter_trap, &enter_trap, FuncSignature::build<uint64_t, void*, uint64_t, uint64_t, uint64_t>());
    call_enter_trap->setArg(0, jh.arch_if_ptr);
    call_enter_trap->setArg(1, current_trap_state);
    call_enter_trap->setArg(2, current_pc);
-    call_enter_trap->setArg(3, instr);
+    call_enter_trap->setArg(3, jh.globals[TVAL]);
-    x86::Gp current_next_pc = get_reg_for(jh, traits::NEXT_PC);
+    x86_reg_t current_next_pc = get_reg_for(cc, traits::NEXT_PC);
-    cc.mov(current_next_pc, get_ptr_for(jh, traits::NEXT_PC));
+    mov(cc, current_next_pc, get_ptr_for(jh, traits::NEXT_PC));
-    cc.mov(jh.next_pc, current_next_pc);
+    mov(cc, jh.next_pc, current_next_pc);
-    cc.mov(get_ptr_for(jh, traits::LAST_BRANCH), std::numeric_limits<uint32_t>::max());
+    mov(cc, get_ptr_for(jh, traits::LAST_BRANCH), static_cast<int>(UNKNOWN_JUMP));
    cc.ret(jh.next_pc);
 }
 template <typename ARCH>
 inline void vm_impl<ARCH>::gen_raise(jit_holder& jh, uint16_t trap_id, uint16_t cause) {
    auto& cc = jh.cc;
    cc.comment("//gen_raise");
-    auto tmp1 = get_reg_for(jh, traits::TRAP_STATE);
+    auto tmp1 = get_reg_for(cc, traits::TRAP_STATE);
-    cc.mov(tmp1, 0x80ULL << 24 | (cause << 16) | trap_id);
+    mov(cc, tmp1, 0x80ULL << 24 | (cause << 16) | trap_id);
-    cc.mov(get_ptr_for(jh, traits::TRAP_STATE), tmp1);
+    mov(cc, get_ptr_for(jh, traits::TRAP_STATE), tmp1);
-    cc.mov(jh.next_pc, std::numeric_limits<uint32_t>::max());
+    cc.jmp(jh.trap_entry);
 }
 template <typename ARCH>
 template <typename T, typename>
 void vm_impl<ARCH>::gen_set_tval(jit_holder& jh, T new_tval) {
        mov(jh.cc, jh.globals[TVAL], new_tval);
    }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_set_tval(jit_holder& jh, x86_reg_t _new_tval) {
    if(nonstd::holds_alternative<x86::Gp>(_new_tval)) {
        x86::Gp new_tval = nonstd::get<x86::Gp>(_new_tval);
        if(new_tval.size() < 8)
            new_tval = gen_ext_Gp(jh.cc, new_tval, 64, false);
        mov(jh.cc, jh.globals[TVAL], new_tval);
    } else {
        throw std::runtime_error("Variant not supported in gen_set_tval");
    }
 }
 } // namespace tgc5c
--- a/gen_input/templates/interp/CORENAME.cpp.gtl
+++ b/gen_input/templates/interp/CORENAME.cpp.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 20217-2024 MINRES Technologies GmbH
+ * Copyright (C) 2017-2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@ def nativeTypeSize(int size){
 }
 %>
 // clang-format off
 #include <cstdint>
 #include <iss/arch/${coreDef.name.toLowerCase()}.h>
 #include <iss/debugger/gdb_session.h>
 #include <iss/debugger/server.h>
@@ -47,6 +48,8 @@ def nativeTypeSize(int size){
 #include <exception>
 #include <vector>
 #include <sstream>
 #include <iss/instruction_decoder.h>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
@@ -97,7 +100,12 @@ protected:
    using compile_ret_t = virt_addr_t;
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr);
-    inline const char *name(size_t index){return index<traits::reg_aliases.size()?traits::reg_aliases[index]:"illegal";}
+    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
 <%
 def fcsr = registers.find {it.name=='FCSR'}
 if(fcsr != null) {%>
    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}     
 <%}%>
    virt_addr_t execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit) override;
@@ -106,7 +114,6 @@ protected:
    inline void raise(uint16_t trap_id, uint16_t cause){
        auto trap_val =  0x80ULL << 24 | (cause << 16) | trap_id;
        this->core.reg.trap_state = trap_val;
        this->template get_reg<uint${addrDataWidth}_t>(traits::NEXT_PC) = std::numeric_limits<uint${addrDataWidth}_t>::max();
    }
    inline void leave(unsigned lvl){
@@ -117,7 +124,12 @@ protected:
        this->core.wait_until(type);
    }
    inline void set_tval(uint64_t new_tval){
        tval = new_tval;
    }
    uint64_t fetch_count{0};
    uint64_t tval{0};
    using yield_t = boost::coroutines2::coroutine<void>::push_type;
    using coro_t = boost::coroutines2::coroutine<void>::pull_type;
@@ -148,25 +160,20 @@ private:
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
-        size_t length;
+        uint32_t length;
        uint32_t value;
        uint32_t mask;
        typename arch::traits<ARCH>::opcode_e op;
    };
    struct decoding_tree_node{
        std::vector<instruction_descriptor> instrs;
        std::vector<decoding_tree_node*> children;
        uint32_t submask = std::numeric_limits<uint32_t>::max();
        uint32_t value;
        decoding_tree_node(uint32_t value) : value(value){}
    };
    decoding_tree_node* root {nullptr};
    const std::array<instruction_descriptor, ${instructions.size()}> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
        {${instr.length}, ${instr.encoding}, ${instr.mask}, arch::traits<ARCH>::opcode_e::${instr.instruction.name}},<%}%>
    }};
    //needs to be declared after instr_descr
    decoder instr_decoder;
    iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
        if(this->core.has_mmu()) {
            auto phys_pc = this->core.virt2phys(pc);
@@ -186,67 +193,12 @@ private:
        }
        return iss::Ok;
    }
    void populate_decoding_tree(decoding_tree_node* root){
        //create submask
        for(auto instr: root->instrs){
            root->submask &= instr.mask;
        }
        //put each instr according to submask&encoding into children
        for(auto instr: root->instrs){
            bool foundMatch = false;
            for(auto child: root->children){
                //use value as identifying trait
                if(child->value == (instr.value&root->submask)){
                    child->instrs.push_back(instr);
                    foundMatch = true;
                }
            }
            if(!foundMatch){
                decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
                child->instrs.push_back(instr);
                root->children.push_back(child);
            }
        }
        root->instrs.clear();
        //call populate_decoding_tree for all children
        if(root->children.size() >1)
            for(auto child: root->children){
                populate_decoding_tree(child);      
            }
        else{
            //sort instrs by value of the mask, this works bc we want to have the least restrictive one last
            std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
            return instr1.mask > instr2.mask;
            }); 
        }
    }
    typename arch::traits<ARCH>::opcode_e  decode_instr(decoding_tree_node* node, code_word_t word){
        if(!node->children.size()){
            if(node->instrs.size() == 1) return node->instrs[0].op;
            for(auto instr : node->instrs){
                if((instr.mask&word) == instr.value) return instr.op;
            }
        }
        else{
            for(auto child : node->children){
                if (child->value == (node->submask&word)){
                    return decode_instr(child, word);
                }  
            }  
        }
        return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
    }
 };
 template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
    volatile CODE_WORD x = insn;
    insn = 2 * x;
 }
 template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
 // according to
 // https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation
 #ifdef __GCC__
@@ -263,13 +215,16 @@ constexpr size_t bit_count(uint32_t u) {
 template <typename ARCH>
 vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
-: vm_base<ARCH>(core, core_id, cluster_id) {
+: vm_base<ARCH>(core, core_id, cluster_id)
-    root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
+, instr_decoder([this]() {
-    for(auto instr:instr_descr){
+        std::vector<generic_instruction_descriptor> g_instr_descr;
-        root->instrs.push_back(instr);
+        g_instr_descr.reserve(instr_descr.size());
-    }
+        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
-    populate_decoding_tree(root);
+            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
-}
+            g_instr_descr.push_back(new_instr_descr);
        }
        return std::move(g_instr_descr);
    }()) {}
 inline bool is_icount_limit_enabled(finish_cond_e cond){
    return (cond & finish_cond_e::ICOUNT_LIMIT) == finish_cond_e::ICOUNT_LIMIT;
@@ -299,16 +254,24 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
    while(!this->core.should_stop() &&
            !(is_icount_limit_enabled(cond) && icount >= count_limit) &&
            !(is_fcount_limit_enabled(cond) && fetch_count >= count_limit)){
-        fetch_count++;
+        if(this->debugging_enabled())
            this->tgt_adapter->check_continue(*PC);
        pc.val=*PC;
        if(fetch_ins(pc, data)!=iss::Ok){
-            this->do_sync(POST_SYNC, std::numeric_limits<unsigned>::max());
+            if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
-            pc.val = super::core.enter_trap(std::numeric_limits<uint64_t>::max(), pc.val, 0);
+            process_spawn_blocks();
            if(this->sync_exec && POST_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
            pc.val = super::core.enter_trap(arch::traits<ARCH>::RV_CAUSE_FETCH_ACCESS<<16, pc.val, 0);
        } else {
            if (is_jump_to_self_enabled(cond) &&
                    (instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-            auto inst_id = decode_instr(root, instr);
+            uint32_t inst_index = instr_decoder.decode_instr(instr);
            opcode_e inst_id = arch::traits<ARCH>::opcode_e::MAX_OPCODE;;
            if(inst_index <instr_descr.size())
                inst_id = instr_descr[inst_index].op;
            // pre execution stuff
-             this->core.reg.last_branch = 0;
+            this->core.reg.last_branch = 0;
            if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id));
            try{
                switch(inst_id){<%instructions.eachWithIndex{instr, idx -> %>
@@ -317,6 +280,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    <%}%>if(this->disass_enabled){
                        /* generate console output when executing the command */<%instr.disass.eachLine{%>
                        ${it}<%}%>
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers<%instr.usedVariables.each{ k,v->
                    if(v.isArray) {%>
@@ -341,16 +305,18 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
            //    this->core.reg.trap_state =  this->core.reg.pending_trap;
            // trap check
            if(trap_state!=0){
-                super::core.enter_trap(trap_state, pc.val, instr);
+                //In case of Instruction address misaligned (cause = 0 and trapid = 0) need the targeted addr (in tval)
                auto mcause = (trap_state>>16) & 0xff; 
                super::core.enter_trap(trap_state, pc.val, mcause ? instr:tval);
            } else {
                icount++;
                instret++;
            }
-            cycle++;
+            *PC = *NEXT_PC;
            pc.val=*NEXT_PC;
            this->core.reg.PC = this->core.reg.NEXT_PC;
            this->core.reg.trap_state =  this->core.reg.pending_trap;
        }
        fetch_count++;
        cycle++;
    }
    return pc;
 }
--- a/gen_input/templates/llvm/CORENAME.cpp.gtl
+++ b/gen_input/templates/llvm/CORENAME.cpp.gtl
@@ -36,7 +36,10 @@
 #include <iss/iss.h>
 #include <iss/llvm/vm_base.h>
 #include <util/logging.h>
-
+#include <iss/instruction_decoder.h>
 <%def fcsr = registers.find {it.name=='FCSR'}
 if(fcsr != null) {%>
 #include <vm/fp_functions.h><%}%>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -82,7 +85,9 @@ protected:
    using vm_base<ARCH>::get_reg_ptr;
    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
-
+<%if(fcsr != null) {%>
    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
 <%}%>
    template <typename T> inline ConstantInt *size(T type) {
        return ConstantInt::get(getContext(), APInt(32, type->getType()->getScalarSizeInBits()));
    }
@@ -96,13 +101,16 @@ protected:
        return super::gen_cond_assign(cond, this->gen_ext(trueVal, size), this->gen_ext(falseVal, size));
    }
-    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, unsigned int &, BasicBlock *) override;
+    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, BasicBlock *) override;
    void gen_leave_behavior(BasicBlock *leave_blk) override;
    void gen_raise_trap(uint16_t trap_id, uint16_t cause);
    void gen_leave_trap(unsigned lvl);
    void gen_wait(unsigned type);
    void set_tval(uint64_t new_tval);
    void set_tval(Value* new_tval);
    void gen_trap_behavior(BasicBlock *) override;
    void gen_instr_prologue();
    void gen_instr_epilogue(BasicBlock *bb);
    inline Value *gen_reg_load(unsigned i, unsigned level = 0) {
@@ -127,26 +135,19 @@ protected:
        auto sign_mask = 1ULL<<(W-1);
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
    }
-
+<%functions.each{ it.eachLine { %>
    ${it}<%}%>
 <%}%>
 private:
    /****************************************************************************
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
-        size_t length;
+        uint32_t length;
        uint32_t value;
        uint32_t mask;
        compile_func op;
    };
    struct decoding_tree_node{
        std::vector<instruction_descriptor> instrs;
        std::vector<decoding_tree_node*> children;
        uint32_t submask = std::numeric_limits<uint32_t>::max();
        uint32_t value;
        decoding_tree_node(uint32_t value) : value(value){}
    };
    decoding_tree_node* root {nullptr};
    const std::array<instruction_descriptor, ${instructions.size()}> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
@@ -154,6 +155,9 @@ private:
        {${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
    }};
    //needs to be declared after instr_descr
    decoder instr_decoder;
    /* instruction definitions */<%instructions.eachWithIndex{instr, idx -> %>
    /* instruction ${idx}: ${instr.name} */
    std::tuple<continuation_e, BasicBlock*> __${generator.functionName(instr.name)}(virt_addr_t& pc, code_word_t instr, BasicBlock* bb){
@@ -162,18 +166,27 @@ private:
        <%}%>if(this->disass_enabled){
            /* generate console output when executing the command */<%instr.disass.eachLine{%>
            ${it}<%}%>
            std::vector<Value*> args {
                this->core_ptr,
                this->gen_const(64, pc.val),
                this->builder.CreateGlobalStringPtr(mnemonic),
            };
            this->builder.CreateCall(this->mod->getFunction("print_disass"), args);
        }
        bb->setName(fmt::format("${instr.name}_0x{:X}",pc.val));
        this->gen_sync(PRE_SYNC,${idx});
-        auto cur_pc_val = this->gen_const(32,pc.val);
+        
        this->gen_set_pc(pc, traits::PC);
        this->set_tval(instr);
        pc=pc+ ${instr.length/8};
        this->gen_set_pc(pc, traits::NEXT_PC);
        this->gen_instr_prologue();
        /*generate behavior*/
        <%instr.behavior.eachLine{%>${it}
        <%}%>
        this->gen_sync(POST_SYNC, ${idx});
        this->gen_instr_epilogue(bb);
    	this->gen_sync(POST_SYNC, ${idx});
        this->builder.CreateBr(bb);
    	return returnValue;        
    }
@@ -181,8 +194,17 @@ private:
    /****************************************************************************
     * end opcode definitions
     ****************************************************************************/
-    std::tuple<continuation_e, BasicBlock *> illegal_intruction(virt_addr_t &pc, code_word_t instr, BasicBlock *bb) {
+    std::tuple<continuation_e, BasicBlock *> illegal_instruction(virt_addr_t &pc, code_word_t instr, BasicBlock *bb) {
-		this->gen_sync(iss::PRE_SYNC, instr_descr.size());
+        if(this->disass_enabled){
            auto mnemonic = std::string("illegal_instruction");
            std::vector<Value*> args {
                this->core_ptr,
                this->gen_const(64, pc.val),
                this->builder.CreateGlobalStringPtr(mnemonic),
            };
            this->builder.CreateCall(this->mod->getFunction("print_disass"), args);
        }
        this->gen_sync(iss::PRE_SYNC, instr_descr.size());
        this->builder.CreateStore(this->builder.CreateLoad(this->get_typeptr(traits::NEXT_PC), get_reg_ptr(traits::NEXT_PC), true),
                                   get_reg_ptr(traits::PC), true);
        this->builder.CreateStore(
@@ -190,62 +212,13 @@ private:
                                     this->gen_const(64U, 1)),
            get_reg_ptr(traits::ICOUNT), true);
        pc = pc + ((instr & 3) == 3 ? 4 : 2);
        this->set_tval(instr);
        this->gen_raise_trap(0, 2);     // illegal instruction trap
 		this->gen_sync(iss::POST_SYNC, instr_descr.size());
-        this->gen_instr_epilogue(this->leave_blk);
+        bb = this->leave_blk;
-        return std::make_tuple(BRANCH, nullptr);
+        this->gen_instr_epilogue(bb);
-    }    
+        this->builder.CreateBr(bb);
-    //decoding functionality
+        return std::make_tuple(ILLEGAL_INSTR, nullptr);
    void populate_decoding_tree(decoding_tree_node* root){
        //create submask
        for(auto instr: root->instrs){
            root->submask &= instr.mask;
        }
        //put each instr according to submask&encoding into children
        for(auto instr: root->instrs){
            bool foundMatch = false;
            for(auto child: root->children){
                //use value as identifying trait
                if(child->value == (instr.value&root->submask)){
                    child->instrs.push_back(instr);
                    foundMatch = true;
                }
            }
            if(!foundMatch){
                decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
                child->instrs.push_back(instr);
                root->children.push_back(child);
            }
        }
        root->instrs.clear();
        //call populate_decoding_tree for all children
        if(root->children.size() >1)
            for(auto child: root->children){
                populate_decoding_tree(child);      
            }
        else{
            //sort instrs by value of the mask, this works bc we want to have the least restrictive one last
            std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
            return instr1.mask > instr2.mask;
            }); 
        }
    }
    compile_func decode_instr(decoding_tree_node* node, code_word_t word){
        if(!node->children.size()){
            if(node->instrs.size() == 1) return node->instrs[0].op;
            for(auto instr : node->instrs){
                if((instr.mask&word) == instr.value) return instr.op;
            }
        }
        else{
            for(auto child : node->children){
                if (child->value == (node->submask&word)){
                    return decode_instr(child, word);
                }  
            }  
        }
        return nullptr;
    }    
 };
@@ -258,17 +231,20 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
 template <typename ARCH>
 vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
-: vm_base<ARCH>(core, core_id, cluster_id) {
+: vm_base<ARCH>(core, core_id, cluster_id)
-    root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
+, instr_decoder([this]() {
-    for(auto instr:instr_descr){
+        std::vector<generic_instruction_descriptor> g_instr_descr;
-        root->instrs.push_back(instr);
+        g_instr_descr.reserve(instr_descr.size());
-    }
+        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
-    populate_decoding_tree(root);
+            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
-}
+            g_instr_descr.push_back(new_instr_descr);
        }
        return std::move(g_instr_descr);
    }()) {}
 template <typename ARCH>
 std::tuple<continuation_e, BasicBlock *>
-vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, BasicBlock *this_block) {
+vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, BasicBlock *this_block) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
@@ -277,23 +253,19 @@ vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt,
    auto *const data = (uint8_t *)&instr;
    if(this->core.has_mmu())
        paddr = this->core.virt2phys(pc);
-    //TODO: re-add page handling
+    auto res = this->core.read(paddr, 4, data);
-//    if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
+    if (res != iss::Ok) 
-//        auto res = this->core.read(paddr, 2, data);
+        return std::make_tuple(ILLEGAL_FETCH, nullptr);
-//        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
+    if (instr == 0x0000006f || (instr&0xffff)==0xa001){
-//        if ((instr & 0x3) == 0x3) { // this is a 32bit instruction
+        this->builder.CreateBr(this->leave_blk);
-//            res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
+        return std::make_tuple(JUMP_TO_SELF, nullptr);
-//        }
+        }
-//    } else {
+    uint32_t inst_index = instr_decoder.decode_instr(instr);
-        auto res = this->core.read(paddr, 4, data);
+    compile_func f = nullptr;
-        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
+    if(inst_index < instr_descr.size())
-//    }
+        f = instr_descr[inst_index].op;
    if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
    // curr pc on stack
    ++inst_cnt;
    auto f = decode_instr(root, instr);
    if (f == nullptr) {
-        f = &this_class::illegal_intruction;
+        f = &this_class::illegal_instruction;
    }
    return (this->*f)(pc, instr, this_block);
 }
@@ -308,16 +280,14 @@ template <typename ARCH>
 void vm_impl<ARCH>::gen_raise_trap(uint16_t trap_id, uint16_t cause) {
    auto *TRAP_val = this->gen_const(32, 0x80 << 24 | (cause << 16) | trap_id);
    this->builder.CreateStore(TRAP_val, get_reg_ptr(traits::TRAP_STATE), true);
-    this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()), get_reg_ptr(traits::LAST_BRANCH), false);
+    this->builder.CreateBr(this->trap_blk);
 }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_leave_trap(unsigned lvl) {
    std::vector<Value *> args{ this->core_ptr, ConstantInt::get(getContext(), APInt(64, lvl)) };
    this->builder.CreateCall(this->mod->getFunction("leave_trap"), args);
-    auto *PC_val = this->gen_read_mem(traits::CSR, (lvl << 8) + 0x41, traits::XLEN / 8);
+    this->builder.CreateStore(this->gen_const(32U, static_cast<int>(UNKNOWN_JUMP)), get_reg_ptr(traits::LAST_BRANCH), false);
    this->builder.CreateStore(PC_val, get_reg_ptr(traits::NEXT_PC), false);
    this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()), get_reg_ptr(traits::LAST_BRANCH), false);
 }
 template <typename ARCH>
@@ -326,19 +296,37 @@ void vm_impl<ARCH>::gen_wait(unsigned type) {
    this->builder.CreateCall(this->mod->getFunction("wait"), args);
 }
 template <typename ARCH>
 inline void vm_impl<ARCH>::set_tval(uint64_t tval) {
    auto tmp_tval = this->gen_const(64, tval);
    this->set_tval(tmp_tval);
 }
 template <typename ARCH>
 inline void vm_impl<ARCH>::set_tval(Value* new_tval) {
    this->builder.CreateStore(this->gen_ext(new_tval, 64, false), this->tval);
 }
 template <typename ARCH> 
 void vm_impl<ARCH>::gen_trap_behavior(BasicBlock *trap_blk) {
    this->builder.SetInsertPoint(trap_blk);
    this->gen_sync(POST_SYNC, -1); //TODO get right InstrId
    auto *trap_state_val = this->builder.CreateLoad(this->get_typeptr(traits::TRAP_STATE), get_reg_ptr(traits::TRAP_STATE), true);
-    this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()),
+    auto *cur_pc_val = this->builder.CreateLoad(this->get_typeptr(traits::PC), get_reg_ptr(traits::PC), true);
-                              get_reg_ptr(traits::LAST_BRANCH), false);
+    std::vector<Value *> args{this->core_ptr,
-    std::vector<Value *> args{this->core_ptr, this->adj_to64(trap_state_val),
+                                this->adj_to64(trap_state_val),
-                              this->adj_to64(this->builder.CreateLoad(this->get_typeptr(traits::PC), get_reg_ptr(traits::PC), false))};
+                                this->adj_to64(cur_pc_val),
                              this->adj_to64(this->builder.CreateLoad(this->get_type(64),this->tval))};
    this->builder.CreateCall(this->mod->getFunction("enter_trap"), args);
    this->builder.CreateStore(this->gen_const(32U, static_cast<int>(UNKNOWN_JUMP)), get_reg_ptr(traits::LAST_BRANCH), false);
    auto *trap_addr_val = this->builder.CreateLoad(this->get_typeptr(traits::NEXT_PC), get_reg_ptr(traits::NEXT_PC), false);
    this->builder.CreateRet(trap_addr_val);
 }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_instr_prologue() {
    auto* trap_val =
        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::PENDING_TRAP), get_reg_ptr(arch::traits<ARCH>::PENDING_TRAP));
    this->builder.CreateStore(trap_val, get_reg_ptr(arch::traits<ARCH>::TRAP_STATE), false);
 }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_instr_epilogue(BasicBlock *bb) {
@@ -349,6 +337,14 @@ void vm_impl<ARCH>::gen_instr_epilogue(BasicBlock *bb) {
                              ConstantInt::get(getContext(), APInt(v->getType()->getIntegerBitWidth(), 0))),
                          target_bb, this->trap_blk, 1);
    this->builder.SetInsertPoint(target_bb);
    // update icount
    auto* icount_val = this->builder.CreateAdd(
        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::ICOUNT), get_reg_ptr(arch::traits<ARCH>::ICOUNT)), this->gen_const(64U, 1));
    this->builder.CreateStore(icount_val, get_reg_ptr(arch::traits<ARCH>::ICOUNT), false);
    //increment cyclecount
    auto* cycle_val = this->builder.CreateAdd(
        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::CYCLE), get_reg_ptr(arch::traits<ARCH>::CYCLE)), this->gen_const(64U, 1));
    this->builder.CreateStore(cycle_val, get_reg_ptr(arch::traits<ARCH>::CYCLE), false);
 }
 } // namespace ${coreDef.name.toLowerCase()}
--- a/gen_input/templates/tcc/CORENAME.cpp.gtl
+++ b/gen_input/templates/tcc/CORENAME.cpp.gtl
@@ -37,7 +37,10 @@
 #include <iss/tcc/vm_base.h>
 #include <util/logging.h>
 #include <sstream>
-
+#include <iss/instruction_decoder.h>
 <%def fcsr = registers.find {it.name=='FCSR'}
 if(fcsr != null) {%>
 #include <vm/fp_functions.h><%}%>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -80,16 +83,21 @@ protected:
    using vm_base<ARCH>::get_reg_ptr;
    using this_class = vm_impl<ARCH>;
-    using compile_ret_t = std::tuple<continuation_e>;
+    using compile_ret_t = continuation_e;
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr, tu_builder&);
    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
 <%
 if(fcsr != null) {%>
    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
 <%}%>
    void add_prologue(tu_builder& tu) override;
    void setup_module(std::string m) override {
        super::setup_module(m);
    }
-    compile_ret_t gen_single_inst_behavior(virt_addr_t &, unsigned int &, tu_builder&) override;
+    compile_ret_t gen_single_inst_behavior(virt_addr_t &, tu_builder&) override;
    void gen_trap_behavior(tu_builder& tu) override;
@@ -97,7 +105,9 @@ protected:
    void gen_leave_trap(tu_builder& tu, unsigned lvl);
-    void gen_wait(tu_builder& tu, unsigned type);
+    inline void gen_set_tval(tu_builder& tu, uint64_t new_tval);
    inline void gen_set_tval(tu_builder& tu, value new_tval);
    inline void gen_trap_check(tu_builder& tu) {
        tu("if(*trap_state!=0) goto trap_entry;");
@@ -128,25 +138,19 @@ protected:
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
    }
 <%functions.each{ it.eachLine { %>
    ${it}<%}%>
 <%}%>
 private:
    /****************************************************************************
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
-        size_t length;
+        uint32_t length;
        uint32_t value;
        uint32_t mask;
        compile_func op;
    };
    struct decoding_tree_node{
        std::vector<instruction_descriptor> instrs;
        std::vector<decoding_tree_node*> children;
        uint32_t submask = std::numeric_limits<uint32_t>::max();
        uint32_t value;
        decoding_tree_node(uint32_t value) : value(value){}
    };
    decoding_tree_node* root {nullptr};
    const std::array<instruction_descriptor, ${instructions.size()}> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
@@ -154,6 +158,9 @@ private:
        {${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
    }};
    //needs to be declared after instr_descr
    decoder instr_decoder;
    /* instruction definitions */<%instructions.eachWithIndex{instr, idx -> %>
    /* instruction ${idx}: ${instr.name} */
    compile_ret_t __${generator.functionName(instr.name)}(virt_addr_t& pc, code_word_t instr, tu_builder& tu){
@@ -164,82 +171,37 @@ private:
        <%}%>if(this->disass_enabled){
            /* generate console output when executing the command */<%instr.disass.eachLine{%>
            ${it}<%}%>
            tu("print_disass(core_ptr, {:#x}, \"{}\");", pc.val, mnemonic);
        }
        auto cur_pc_val = tu.constant(pc.val, traits::reg_bit_widths[traits::PC]);
        pc=pc+ ${instr.length/8};
        gen_set_pc(tu, pc, traits::NEXT_PC);
        tu("(*cycle)++;");
        tu.open_scope();
        this->gen_set_tval(tu, instr);
        <%instr.behavior.eachLine{%>${it}
        <%}%>
        tu.close_scope();
        gen_trap_check(tu);        
        vm_base<ARCH>::gen_sync(tu, POST_SYNC,${idx});
        gen_trap_check(tu);        
        return returnValue;
    }
    <%}%>
    /****************************************************************************
     * end opcode definitions
     ****************************************************************************/
-    compile_ret_t illegal_intruction(virt_addr_t &pc, code_word_t instr, tu_builder& tu) {
+    compile_ret_t illegal_instruction(virt_addr_t &pc, code_word_t instr, tu_builder& tu) {
        vm_impl::gen_sync(tu, iss::PRE_SYNC, instr_descr.size());
        if(this->disass_enabled){
            /* generate console output when executing the command */
            tu("print_disass(core_ptr, {:#x}, \"{}\");", pc.val, std::string("illegal_instruction"));
        }
        pc = pc + ((instr & 3) == 3 ? 4 : 2);
-        gen_raise_trap(tu, 0, 2);     // illegal instruction trap
+        gen_raise_trap(tu, 0, static_cast<int32_t>(traits:: RV_CAUSE_ILLEGAL_INSTRUCTION));
        this->gen_set_tval(tu, instr);
        vm_impl::gen_sync(tu, iss::POST_SYNC, instr_descr.size());
        vm_impl::gen_trap_check(tu);
-        return BRANCH;
+        return ILLEGAL_INSTR;
    }
    //decoding functionality
    void populate_decoding_tree(decoding_tree_node* root){
        //create submask
        for(auto instr: root->instrs){
            root->submask &= instr.mask;
        }
        //put each instr according to submask&encoding into children
        for(auto instr: root->instrs){
            bool foundMatch = false;
            for(auto child: root->children){
                //use value as identifying trait
                if(child->value == (instr.value&root->submask)){
                    child->instrs.push_back(instr);
                    foundMatch = true;
                }
            }
            if(!foundMatch){
                decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
                child->instrs.push_back(instr);
                root->children.push_back(child);
            }
        }
        root->instrs.clear();
        //call populate_decoding_tree for all children
        if(root->children.size() >1)
            for(auto child: root->children){
                populate_decoding_tree(child);      
            }
        else{
            //sort instrs by value of the mask, this works bc we want to have the least restrictive one last
            std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
            return instr1.mask > instr2.mask;
            }); 
        }
    }
    compile_func decode_instr(decoding_tree_node* node, code_word_t word){
        if(!node->children.size()){
            if(node->instrs.size() == 1) return node->instrs[0].op;
            for(auto instr : node->instrs){
                if((instr.mask&word) == instr.value) return instr.op;
            }
        }
        else{
            for(auto child : node->children){
                if (child->value == (node->submask&word)){
                    return decode_instr(child, word);
                }  
            }  
        }
        return nullptr;
    }
 };
@@ -252,65 +214,100 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
 template <typename ARCH>
 vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
-: vm_base<ARCH>(core, core_id, cluster_id) {
+: vm_base<ARCH>(core, core_id, cluster_id)
-    root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
+, instr_decoder([this]() {
-    for(auto instr:instr_descr){
+        std::vector<generic_instruction_descriptor> g_instr_descr;
-        root->instrs.push_back(instr);
+        g_instr_descr.reserve(instr_descr.size());
-    }
+        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
-    populate_decoding_tree(root);
+            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
-}
+            g_instr_descr.push_back(new_instr_descr);
        }
        return std::move(g_instr_descr);
    }()) {}
 template <typename ARCH>
-std::tuple<continuation_e>
+continuation_e
-vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) {
+vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, tu_builder& tu) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
    if(this->core.has_mmu())
        paddr = this->core.virt2phys(pc);
-    //TODO: re-add page handling
+    auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
-//    if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
+    if (res != iss::Ok)
-//        auto res = this->core.read(paddr, 2, data);
+        return ILLEGAL_FETCH;
-//        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
+    if (instr == 0x0000006f || (instr&0xffff)==0xa001) 
-//        if ((insn & 0x3) == 0x3) { // this is a 32bit instruction
+        return JUMP_TO_SELF;
-//            res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
+    uint32_t inst_index = instr_decoder.decode_instr(instr);
-//        }
+    compile_func f = nullptr;
-//    } else {
+    if(inst_index < instr_descr.size())
-        auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
+        f = instr_descr[inst_index].op;
        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
 //    }
    if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
    // curr pc on stack
    ++inst_cnt;
    auto f = decode_instr(root, instr);
    if (f == nullptr) {
-        f = &this_class::illegal_intruction;
+        f = &this_class::illegal_instruction;
    }
    return (this->*f)(pc, instr, tu);
 }
 template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) {
    tu("  *trap_state = {:#x};", 0x80 << 24 | (cause << 16) | trap_id);
    tu.store(traits::NEXT_PC, tu.constant(std::numeric_limits<uint32_t>::max(), 32));
 }
 template <typename ARCH> void vm_impl<ARCH>::gen_leave_trap(tu_builder& tu, unsigned lvl) {
    tu("leave_trap(core_ptr, {});", lvl);
    tu.store(traits::NEXT_PC, tu.read_mem(traits::CSR, (lvl << 8) + 0x41, traits::XLEN));
-    tu.store(traits::LAST_BRANCH, tu.constant(std::numeric_limits<uint32_t>::max(), 32));
+    tu.store(traits::LAST_BRANCH, tu.constant(static_cast<int>(UNKNOWN_JUMP), 32));
 }
-template <typename ARCH> void vm_impl<ARCH>::gen_wait(tu_builder& tu, unsigned type) {
+template <typename ARCH> void vm_impl<ARCH>::gen_set_tval(tu_builder& tu, uint64_t new_tval) {
    tu(fmt::format("tval = {};", new_tval));
 }
 template <typename ARCH> void vm_impl<ARCH>::gen_set_tval(tu_builder& tu, value new_tval) {
    tu(fmt::format("tval = {};", new_tval.str));
 }
 template <typename ARCH> void vm_impl<ARCH>::gen_trap_behavior(tu_builder& tu) {
    tu("trap_entry:");
    this->gen_sync(tu, POST_SYNC, -1);    
-    tu("enter_trap(core_ptr, *trap_state, *pc, 0);");
+    tu("enter_trap(core_ptr, *trap_state, *pc, tval);");
-    tu.store(traits::LAST_BRANCH, tu.constant(std::numeric_limits<uint32_t>::max(),32));
+    tu.store(traits::LAST_BRANCH, tu.constant(static_cast<int>(UNKNOWN_JUMP),32));
    tu("return *next_pc;");
 }
 template <typename ARCH> void vm_impl<ARCH>::add_prologue(tu_builder& tu){
    std::ostringstream os;
    os << tu.add_reg_ptr("trap_state", arch::traits<ARCH>::TRAP_STATE, this->regs_base_ptr);
    os << tu.add_reg_ptr("pending_trap", arch::traits<ARCH>::PENDING_TRAP, this->regs_base_ptr);
    os << tu.add_reg_ptr("cycle", arch::traits<ARCH>::CYCLE, this->regs_base_ptr);
 <%if(fcsr != null) {%>
    os << "uint32_t (*fget_flags)()=" << (uintptr_t)&fget_flags << ";\\n";
    os << "uint32_t (*fadd_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fadd_s << ";\\n";
    os << "uint32_t (*fsub_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fsub_s << ";\\n";
    os << "uint32_t (*fmul_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fmul_s << ";\\n";
    os << "uint32_t (*fdiv_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fdiv_s << ";\\n";
    os << "uint32_t (*fsqrt_s)(uint32_t v1, uint8_t mode)=" << (uintptr_t)&fsqrt_s << ";\\n";
    os << "uint32_t (*fcmp_s)(uint32_t v1, uint32_t v2, uint32_t op)=" << (uintptr_t)&fcmp_s << ";\\n";
    os << "uint32_t (*fcvt_s)(uint32_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_s << ";\\n";
    os << "uint32_t (*fmadd_s)(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t op, uint8_t mode)=" << (uintptr_t)&fmadd_s << ";\\n";
    os << "uint32_t (*fsel_s)(uint32_t v1, uint32_t v2, uint32_t op)=" << (uintptr_t)&fsel_s << ";\\n";
    os << "uint32_t (*fclass_s)( uint32_t v1 )=" << (uintptr_t)&fclass_s << ";\\n";
    os << "uint32_t (*fconv_d2f)(uint64_t v1, uint8_t mode)=" << (uintptr_t)&fconv_d2f << ";\\n";
    os << "uint64_t (*fconv_f2d)(uint32_t v1, uint8_t mode)=" << (uintptr_t)&fconv_f2d << ";\\n";
    os << "uint64_t (*fadd_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fadd_d << ";\\n";
    os << "uint64_t (*fsub_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fsub_d << ";\\n";
    os << "uint64_t (*fmul_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fmul_d << ";\\n";
    os << "uint64_t (*fdiv_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fdiv_d << ";\\n";
    os << "uint64_t (*fsqrt_d)(uint64_t v1, uint8_t mode)=" << (uintptr_t)&fsqrt_d << ";\\n";
    os << "uint64_t (*fcmp_d)(uint64_t v1, uint64_t v2, uint32_t op)=" << (uintptr_t)&fcmp_d << ";\\n";
    os << "uint64_t (*fcvt_d)(uint64_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_d << ";\\n";
    os << "uint64_t (*fmadd_d)(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mode)=" << (uintptr_t)&fmadd_d << ";\\n";
    os << "uint64_t (*fsel_d)(uint64_t v1, uint64_t v2, uint32_t op)=" << (uintptr_t)&fsel_d << ";\\n";
    os << "uint64_t (*fclass_d)(uint64_t v1  )=" << (uintptr_t)&fclass_d << ";\\n";
    os << "uint64_t (*fcvt_32_64)(uint32_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_32_64 << ";\\n";
    os << "uint32_t (*fcvt_64_32)(uint64_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_64_32 << ";\\n";
    os << "uint32_t (*unbox_s)(uint64_t v)=" << (uintptr_t)&unbox_s << ";\\n";
    <%}%>
    tu.add_prologue(os.str());
 }
 } // namespace ${coreDef.name.toLowerCase()}
--- a/softfloat/.gitignore
+++ b/softfloat/.gitignore
@@ -0,0 +1,2 @@
 build/*/*.o
 build/*/*.a
--- a/softfloat/README.md
+++ b/softfloat/README.md
@@ -0,0 +1,24 @@
 Package Overview for Berkeley SoftFloat Release 3e
 ==================================================
 John R. Hauser<br>
 2018 January 20
 Berkeley SoftFloat is a software implementation of binary floating-point
 that conforms to the IEEE Standard for Floating-Point Arithmetic.  SoftFloat
 is distributed in the form of C source code.  Building the SoftFloat sources
 generates a library file (typically `softfloat.a` or `libsoftfloat.a`)
 containing the floating-point subroutines.
 The SoftFloat package is documented in the following files in the `doc`
 subdirectory:
 * [SoftFloat.html](http://www.jhauser.us/arithmetic/SoftFloat-3/doc/SoftFloat.html) Documentation for using the SoftFloat functions.
 * [SoftFloat-source.html](http://www.jhauser.us/arithmetic/SoftFloat-3/doc/SoftFloat-source.html) Documentation for building SoftFloat.
 * [SoftFloat-history.html](http://www.jhauser.us/arithmetic/SoftFloat-3/doc/SoftFloat-history.html) History of the major changes to SoftFloat.
 Other files in the package comprise the source code for SoftFloat.
--- a/softfloat/build/Linux-386-GCC/platform.h
+++ b/softfloat/build/Linux-386-GCC/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
--- a/softfloat/build/Linux-386-SSE2-GCC/platform.h
+++ b/softfloat/build/Linux-386-SSE2-GCC/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
--- a/softfloat/build/Linux-ARM-VFPv2-GCC/platform.h
+++ b/softfloat/build/Linux-ARM-VFPv2-GCC/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
--- a/softfloat/build/Linux-RISCV64-GCC/Makefile
+++ b/softfloat/build/Linux-RISCV64-GCC/Makefile
@@ -0,0 +1,399 @@
 #=============================================================================
 #
 # This Makefile is part of the SoftFloat IEEE Floating-Point Arithmetic
 # Package, Release 3e, by John R. Hauser.
 #
 # Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
 # University of California.  All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
 #
 #  1. Redistributions of source code must retain the above copyright notice,
 #     this list of conditions, and the following disclaimer.
 #
 #  2. Redistributions in binary form must reproduce the above copyright
 #     notice, this list of conditions, and the following disclaimer in the
 #     documentation and/or other materials provided with the distribution.
 #
 #  3. Neither the name of the University nor the names of its contributors
 #     may be used to endorse or promote products derived from this software
 #     without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 # DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #
 #=============================================================================
 SOURCE_DIR ?= ../../source
 SPECIALIZE_TYPE ?= RISCV
 MARCH ?= rv64gcv_zfh_zfhmin
 MABI ?= lp64d
 SOFTFLOAT_OPTS ?= \
  -DSOFTFLOAT_ROUND_ODD -DINLINE_LEVEL=5 -DSOFTFLOAT_FAST_DIV32TO16 \
  -DSOFTFLOAT_FAST_DIV64TO32
 DELETE = rm -f
 C_INCLUDES = -I. -I$(SOURCE_DIR)/$(SPECIALIZE_TYPE) -I$(SOURCE_DIR)/include
 COMPILE_C = \
  riscv64-unknown-linux-gnu-gcc -c -march=$(MARCH) -mabi=$(MABI) -Werror-implicit-function-declaration -DSOFTFLOAT_FAST_INT64 \
    $(SOFTFLOAT_OPTS) $(C_INCLUDES) -O2 -o $@
 MAKELIB = ar crs $@
 OBJ = .o
 LIB = .a
 OTHER_HEADERS = $(SOURCE_DIR)/include/opts-GCC.h
 .PHONY: all
 all: softfloat$(LIB)
 OBJS_PRIMITIVES = \
  s_eq128$(OBJ) \
  s_le128$(OBJ) \
  s_lt128$(OBJ) \
  s_shortShiftLeft128$(OBJ) \
  s_shortShiftRight128$(OBJ) \
  s_shortShiftRightJam64$(OBJ) \
  s_shortShiftRightJam64Extra$(OBJ) \
  s_shortShiftRightJam128$(OBJ) \
  s_shortShiftRightJam128Extra$(OBJ) \
  s_shiftRightJam32$(OBJ) \
  s_shiftRightJam64$(OBJ) \
  s_shiftRightJam64Extra$(OBJ) \
  s_shiftRightJam128$(OBJ) \
  s_shiftRightJam128Extra$(OBJ) \
  s_shiftRightJam256M$(OBJ) \
  s_countLeadingZeros8$(OBJ) \
  s_countLeadingZeros16$(OBJ) \
  s_countLeadingZeros32$(OBJ) \
  s_countLeadingZeros64$(OBJ) \
  s_add128$(OBJ) \
  s_add256M$(OBJ) \
  s_sub128$(OBJ) \
  s_sub256M$(OBJ) \
  s_mul64ByShifted32To128$(OBJ) \
  s_mul64To128$(OBJ) \
  s_mul128By32$(OBJ) \
  s_mul128To256M$(OBJ) \
  s_approxRecip_1Ks$(OBJ) \
  s_approxRecip32_1$(OBJ) \
  s_approxRecipSqrt_1Ks$(OBJ) \
  s_approxRecipSqrt32_1$(OBJ) \
 OBJS_SPECIALIZE = \
  softfloat_raiseFlags$(OBJ) \
  s_f16UIToCommonNaN$(OBJ) \
  s_commonNaNToF16UI$(OBJ) \
  s_propagateNaNF16UI$(OBJ) \
  s_bf16UIToCommonNaN$(OBJ) \
  s_commonNaNToBF16UI$(OBJ) \
  s_f32UIToCommonNaN$(OBJ) \
  s_commonNaNToF32UI$(OBJ) \
  s_propagateNaNF32UI$(OBJ) \
  s_f64UIToCommonNaN$(OBJ) \
  s_commonNaNToF64UI$(OBJ) \
  s_propagateNaNF64UI$(OBJ) \
  extF80M_isSignalingNaN$(OBJ) \
  s_extF80UIToCommonNaN$(OBJ) \
  s_commonNaNToExtF80UI$(OBJ) \
  s_propagateNaNExtF80UI$(OBJ) \
  f128M_isSignalingNaN$(OBJ) \
  s_f128UIToCommonNaN$(OBJ) \
  s_commonNaNToF128UI$(OBJ) \
  s_propagateNaNF128UI$(OBJ) \
 OBJS_OTHERS = \
  s_roundToUI32$(OBJ) \
  s_roundToUI64$(OBJ) \
  s_roundToI32$(OBJ) \
  s_roundToI64$(OBJ) \
  s_normSubnormalBF16Sig$(OBJ) \
  s_roundPackToBF16$(OBJ) \
  s_normSubnormalF16Sig$(OBJ) \
  s_roundPackToF16$(OBJ) \
  s_normRoundPackToF16$(OBJ) \
  s_addMagsF16$(OBJ) \
  s_subMagsF16$(OBJ) \
  s_mulAddF16$(OBJ) \
  s_normSubnormalF32Sig$(OBJ) \
  s_roundPackToF32$(OBJ) \
  s_normRoundPackToF32$(OBJ) \
  s_addMagsF32$(OBJ) \
  s_subMagsF32$(OBJ) \
  s_mulAddF32$(OBJ) \
  s_normSubnormalF64Sig$(OBJ) \
  s_roundPackToF64$(OBJ) \
  s_normRoundPackToF64$(OBJ) \
  s_addMagsF64$(OBJ) \
  s_subMagsF64$(OBJ) \
  s_mulAddF64$(OBJ) \
  s_normSubnormalExtF80Sig$(OBJ) \
  s_roundPackToExtF80$(OBJ) \
  s_normRoundPackToExtF80$(OBJ) \
  s_addMagsExtF80$(OBJ) \
  s_subMagsExtF80$(OBJ) \
  s_normSubnormalF128Sig$(OBJ) \
  s_roundPackToF128$(OBJ) \
  s_normRoundPackToF128$(OBJ) \
  s_addMagsF128$(OBJ) \
  s_subMagsF128$(OBJ) \
  s_mulAddF128$(OBJ) \
  softfloat_state$(OBJ) \
  ui32_to_f16$(OBJ) \
  ui32_to_f32$(OBJ) \
  ui32_to_f64$(OBJ) \
  ui32_to_extF80$(OBJ) \
  ui32_to_extF80M$(OBJ) \
  ui32_to_f128$(OBJ) \
  ui32_to_f128M$(OBJ) \
  ui64_to_f16$(OBJ) \
  ui64_to_f32$(OBJ) \
  ui64_to_f64$(OBJ) \
  ui64_to_extF80$(OBJ) \
  ui64_to_extF80M$(OBJ) \
  ui64_to_f128$(OBJ) \
  ui64_to_f128M$(OBJ) \
  i32_to_f16$(OBJ) \
  i32_to_f32$(OBJ) \
  i32_to_f64$(OBJ) \
  i32_to_extF80$(OBJ) \
  i32_to_extF80M$(OBJ) \
  i32_to_f128$(OBJ) \
  i32_to_f128M$(OBJ) \
  i64_to_f16$(OBJ) \
  i64_to_f32$(OBJ) \
  i64_to_f64$(OBJ) \
  i64_to_extF80$(OBJ) \
  i64_to_extF80M$(OBJ) \
  i64_to_f128$(OBJ) \
  i64_to_f128M$(OBJ) \
  bf16_isSignalingNaN$(OBJ) \
  bf16_to_f32$(OBJ) \
  f16_to_ui32$(OBJ) \
  f16_to_ui64$(OBJ) \
  f16_to_i32$(OBJ) \
  f16_to_i64$(OBJ) \
  f16_to_ui32_r_minMag$(OBJ) \
  f16_to_ui64_r_minMag$(OBJ) \
  f16_to_i32_r_minMag$(OBJ) \
  f16_to_i64_r_minMag$(OBJ) \
  f16_to_f32$(OBJ) \
  f16_to_f64$(OBJ) \
  f16_to_extF80$(OBJ) \
  f16_to_extF80M$(OBJ) \
  f16_to_f128$(OBJ) \
  f16_to_f128M$(OBJ) \
  f16_roundToInt$(OBJ) \
  f16_add$(OBJ) \
  f16_sub$(OBJ) \
  f16_mul$(OBJ) \
  f16_mulAdd$(OBJ) \
  f16_div$(OBJ) \
  f16_rem$(OBJ) \
  f16_sqrt$(OBJ) \
  f16_eq$(OBJ) \
  f16_le$(OBJ) \
  f16_lt$(OBJ) \
  f16_eq_signaling$(OBJ) \
  f16_le_quiet$(OBJ) \
  f16_lt_quiet$(OBJ) \
  f16_isSignalingNaN$(OBJ) \
  f32_to_ui32$(OBJ) \
  f32_to_ui64$(OBJ) \
  f32_to_i32$(OBJ) \
  f32_to_i64$(OBJ) \
  f32_to_ui32_r_minMag$(OBJ) \
  f32_to_ui64_r_minMag$(OBJ) \
  f32_to_i32_r_minMag$(OBJ) \
  f32_to_i64_r_minMag$(OBJ) \
  f32_to_bf16$(OBJ) \
  f32_to_f16$(OBJ) \
  f32_to_f64$(OBJ) \
  f32_to_extF80$(OBJ) \
  f32_to_extF80M$(OBJ) \
  f32_to_f128$(OBJ) \
  f32_to_f128M$(OBJ) \
  f32_roundToInt$(OBJ) \
  f32_add$(OBJ) \
  f32_sub$(OBJ) \
  f32_mul$(OBJ) \
  f32_mulAdd$(OBJ) \
  f32_div$(OBJ) \
  f32_rem$(OBJ) \
  f32_sqrt$(OBJ) \
  f32_eq$(OBJ) \
  f32_le$(OBJ) \
  f32_lt$(OBJ) \
  f32_eq_signaling$(OBJ) \
  f32_le_quiet$(OBJ) \
  f32_lt_quiet$(OBJ) \
  f32_isSignalingNaN$(OBJ) \
  f64_to_ui32$(OBJ) \
  f64_to_ui64$(OBJ) \
  f64_to_i32$(OBJ) \
  f64_to_i64$(OBJ) \
  f64_to_ui32_r_minMag$(OBJ) \
  f64_to_ui64_r_minMag$(OBJ) \
  f64_to_i32_r_minMag$(OBJ) \
  f64_to_i64_r_minMag$(OBJ) \
  f64_to_f16$(OBJ) \
  f64_to_f32$(OBJ) \
  f64_to_extF80$(OBJ) \
  f64_to_extF80M$(OBJ) \
  f64_to_f128$(OBJ) \
  f64_to_f128M$(OBJ) \
  f64_roundToInt$(OBJ) \
  f64_add$(OBJ) \
  f64_sub$(OBJ) \
  f64_mul$(OBJ) \
  f64_mulAdd$(OBJ) \
  f64_div$(OBJ) \
  f64_rem$(OBJ) \
  f64_sqrt$(OBJ) \
  f64_eq$(OBJ) \
  f64_le$(OBJ) \
  f64_lt$(OBJ) \
  f64_eq_signaling$(OBJ) \
  f64_le_quiet$(OBJ) \
  f64_lt_quiet$(OBJ) \
  f64_isSignalingNaN$(OBJ) \
  extF80_to_ui32$(OBJ) \
  extF80_to_ui64$(OBJ) \
  extF80_to_i32$(OBJ) \
  extF80_to_i64$(OBJ) \
  extF80_to_ui32_r_minMag$(OBJ) \
  extF80_to_ui64_r_minMag$(OBJ) \
  extF80_to_i32_r_minMag$(OBJ) \
  extF80_to_i64_r_minMag$(OBJ) \
  extF80_to_f16$(OBJ) \
  extF80_to_f32$(OBJ) \
  extF80_to_f64$(OBJ) \
  extF80_to_f128$(OBJ) \
  extF80_roundToInt$(OBJ) \
  extF80_add$(OBJ) \
  extF80_sub$(OBJ) \
  extF80_mul$(OBJ) \
  extF80_div$(OBJ) \
  extF80_rem$(OBJ) \
  extF80_sqrt$(OBJ) \
  extF80_eq$(OBJ) \
  extF80_le$(OBJ) \
  extF80_lt$(OBJ) \
  extF80_eq_signaling$(OBJ) \
  extF80_le_quiet$(OBJ) \
  extF80_lt_quiet$(OBJ) \
  extF80_isSignalingNaN$(OBJ) \
  extF80M_to_ui32$(OBJ) \
  extF80M_to_ui64$(OBJ) \
  extF80M_to_i32$(OBJ) \
  extF80M_to_i64$(OBJ) \
  extF80M_to_ui32_r_minMag$(OBJ) \
  extF80M_to_ui64_r_minMag$(OBJ) \
  extF80M_to_i32_r_minMag$(OBJ) \
  extF80M_to_i64_r_minMag$(OBJ) \
  extF80M_to_f16$(OBJ) \
  extF80M_to_f32$(OBJ) \
  extF80M_to_f64$(OBJ) \
  extF80M_to_f128M$(OBJ) \
  extF80M_roundToInt$(OBJ) \
  extF80M_add$(OBJ) \
  extF80M_sub$(OBJ) \
  extF80M_mul$(OBJ) \
  extF80M_div$(OBJ) \
  extF80M_rem$(OBJ) \
  extF80M_sqrt$(OBJ) \
  extF80M_eq$(OBJ) \
  extF80M_le$(OBJ) \
  extF80M_lt$(OBJ) \
  extF80M_eq_signaling$(OBJ) \
  extF80M_le_quiet$(OBJ) \
  extF80M_lt_quiet$(OBJ) \
  f128_to_ui32$(OBJ) \
  f128_to_ui64$(OBJ) \
  f128_to_i32$(OBJ) \
  f128_to_i64$(OBJ) \
  f128_to_ui32_r_minMag$(OBJ) \
  f128_to_ui64_r_minMag$(OBJ) \
  f128_to_i32_r_minMag$(OBJ) \
  f128_to_i64_r_minMag$(OBJ) \
  f128_to_f16$(OBJ) \
  f128_to_f32$(OBJ) \
  f128_to_extF80$(OBJ) \
  f128_to_f64$(OBJ) \
  f128_roundToInt$(OBJ) \
  f128_add$(OBJ) \
  f128_sub$(OBJ) \
  f128_mul$(OBJ) \
  f128_mulAdd$(OBJ) \
  f128_div$(OBJ) \
  f128_rem$(OBJ) \
  f128_sqrt$(OBJ) \
  f128_eq$(OBJ) \
  f128_le$(OBJ) \
  f128_lt$(OBJ) \
  f128_eq_signaling$(OBJ) \
  f128_le_quiet$(OBJ) \
  f128_lt_quiet$(OBJ) \
  f128_isSignalingNaN$(OBJ) \
  f128M_to_ui32$(OBJ) \
  f128M_to_ui64$(OBJ) \
  f128M_to_i32$(OBJ) \
  f128M_to_i64$(OBJ) \
  f128M_to_ui32_r_minMag$(OBJ) \
  f128M_to_ui64_r_minMag$(OBJ) \
  f128M_to_i32_r_minMag$(OBJ) \
  f128M_to_i64_r_minMag$(OBJ) \
  f128M_to_f16$(OBJ) \
  f128M_to_f32$(OBJ) \
  f128M_to_extF80M$(OBJ) \
  f128M_to_f64$(OBJ) \
  f128M_roundToInt$(OBJ) \
  f128M_add$(OBJ) \
  f128M_sub$(OBJ) \
  f128M_mul$(OBJ) \
  f128M_mulAdd$(OBJ) \
  f128M_div$(OBJ) \
  f128M_rem$(OBJ) \
  f128M_sqrt$(OBJ) \
  f128M_eq$(OBJ) \
  f128M_le$(OBJ) \
  f128M_lt$(OBJ) \
  f128M_eq_signaling$(OBJ) \
  f128M_le_quiet$(OBJ) \
  f128M_lt_quiet$(OBJ) \
 OBJS_ALL = $(OBJS_PRIMITIVES) $(OBJS_SPECIALIZE) $(OBJS_OTHERS)
 $(OBJS_ALL): \
  $(OTHER_HEADERS) platform.h $(SOURCE_DIR)/include/primitiveTypes.h \
  $(SOURCE_DIR)/include/primitives.h
 $(OBJS_SPECIALIZE) $(OBJS_OTHERS): \
  $(SOURCE_DIR)/include/softfloat_types.h $(SOURCE_DIR)/include/internals.h \
  $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/specialize.h \
  $(SOURCE_DIR)/include/softfloat.h
 $(OBJS_PRIMITIVES) $(OBJS_OTHERS): %$(OBJ): $(SOURCE_DIR)/%.c
 	$(COMPILE_C) $(SOURCE_DIR)/$*.c
 $(OBJS_SPECIALIZE): %$(OBJ): $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/%.c
 	$(COMPILE_C) $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/$*.c
 softfloat$(LIB): $(OBJS_ALL)
 	$(DELETE) $@
 	$(MAKELIB) $^
 .PHONY: clean
 clean:
 	$(DELETE) $(OBJS_ALL) softfloat$(LIB)
--- a/softfloat/build/Linux-RISCV64-GCC/platform.h
+++ b/softfloat/build/Linux-RISCV64-GCC/platform.h
@@ -0,0 +1,54 @@
 /*============================================================================
 This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
 University of California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
 #define INLINE extern inline
 #endif
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #define SOFTFLOAT_INTRINSIC_INT128 1
 #include "opts-GCC.h"
--- a/softfloat/build/Linux-x86_64-GCC/Makefile
+++ b/softfloat/build/Linux-x86_64-GCC/Makefile
@@ -94,6 +94,8 @@ OBJS_SPECIALIZE = \
  s_f16UIToCommonNaN$(OBJ) \
  s_commonNaNToF16UI$(OBJ) \
  s_propagateNaNF16UI$(OBJ) \
  s_bf16UIToCommonNaN$(OBJ) \
  s_commonNaNToBF16UI$(OBJ) \
  s_f32UIToCommonNaN$(OBJ) \
  s_commonNaNToF32UI$(OBJ) \
  s_propagateNaNF32UI$(OBJ) \
@@ -114,6 +116,8 @@ OBJS_OTHERS = \
  s_roundToUI64$(OBJ) \
  s_roundToI32$(OBJ) \
  s_roundToI64$(OBJ) \
  s_normSubnormalBF16Sig$(OBJ) \
  s_roundPackToBF16$(OBJ) \
  s_normSubnormalF16Sig$(OBJ) \
  s_roundPackToF16$(OBJ) \
  s_normRoundPackToF16$(OBJ) \
@@ -172,6 +176,8 @@ OBJS_OTHERS = \
  i64_to_extF80M$(OBJ) \
  i64_to_f128$(OBJ) \
  i64_to_f128M$(OBJ) \
  bf16_isSignalingNaN$(OBJ) \
  bf16_to_f32$(OBJ) \
  f16_to_ui32$(OBJ) \
  f16_to_ui64$(OBJ) \
  f16_to_i32$(OBJ) \
@@ -209,6 +215,7 @@ OBJS_OTHERS = \
  f32_to_ui64_r_minMag$(OBJ) \
  f32_to_i32_r_minMag$(OBJ) \
  f32_to_i64_r_minMag$(OBJ) \
  f32_to_bf16$(OBJ) \
  f32_to_f16$(OBJ) \
  f32_to_f64$(OBJ) \
  f32_to_extF80$(OBJ) \
--- a/softfloat/build/Win32-MinGW/platform.h
+++ b/softfloat/build/Win32-MinGW/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
--- a/softfloat/build/Win32-SSE2-MinGW/platform.h
+++ b/softfloat/build/Win32-SSE2-MinGW/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
--- a/softfloat/build/Win64-MinGW-w64/platform.h
+++ b/softfloat/build/Win64-MinGW-w64/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,7 +47,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #define SOFTFLOAT_INTRINSIC_INT128 1
 #include "opts-GCC.h"
--- a/softfloat/build/template-FAST_INT64/Makefile
+++ b/softfloat/build/template-FAST_INT64/Makefile
@@ -115,6 +115,8 @@ OBJS_OTHERS = \
  s_roundToUI64$(OBJ) \
  s_roundToI32$(OBJ) \
  s_roundToI64$(OBJ) \
  s_normSubnormalBF16Sig$(OBJ) \
  s_roundPackToBF16$(OBJ) \
  s_normSubnormalF16Sig$(OBJ) \
  s_roundPackToF16$(OBJ) \
  s_normRoundPackToF16$(OBJ) \
@@ -173,6 +175,8 @@ OBJS_OTHERS = \
  i64_to_extF80M$(OBJ) \
  i64_to_f128$(OBJ) \
  i64_to_f128M$(OBJ) \
  bf16_isSignalingNaN$(OBJ) \
  bf16_to_f32$(OBJ) \
  f16_to_ui32$(OBJ) \
  f16_to_ui64$(OBJ) \
  f16_to_i32$(OBJ) \
@@ -210,6 +214,7 @@ OBJS_OTHERS = \
  f32_to_ui64_r_minMag$(OBJ) \
  f32_to_i32_r_minMag$(OBJ) \
  f32_to_i64_r_minMag$(OBJ) \
  f32_to_bf16$(OBJ) \
  f32_to_f16$(OBJ) \
  f32_to_f64$(OBJ) \
  f32_to_extF80$(OBJ) \
--- a/softfloat/build/template-FAST_INT64/platform.h
+++ b/softfloat/build/template-FAST_INT64/platform.h
@@ -37,13 +37,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // Edit lines marked with `==>'.  See "SoftFloat-source.html".
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
-== > #define LITTLEENDIAN 1
+==> #define LITTLEENDIAN 1
-    /*----------------------------------------------------------------------------
+/*----------------------------------------------------------------------------
-     *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
-    == > #define INLINE inline
+==> #define INLINE inline
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
 ==> #define THREAD_LOCAL _Thread_local
    /*----------------------------------------------------------------------------
     *----------------------------------------------------------------------------*/
    == > #define THREAD_LOCAL _Thread_local
--- a/softfloat/build/template-not-FAST_INT64/platform.h
+++ b/softfloat/build/template-not-FAST_INT64/platform.h
@@ -37,13 +37,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // Edit lines marked with `==>'.  See "SoftFloat-source.html".
 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
-== > #define LITTLEENDIAN 1
+==> #define LITTLEENDIAN 1
-    /*----------------------------------------------------------------------------
+/*----------------------------------------------------------------------------
-     *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
-    == > #define INLINE inline
+==> #define INLINE inline
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
 ==> #define THREAD_LOCAL _Thread_local
    /*----------------------------------------------------------------------------
     *----------------------------------------------------------------------------*/
    == > #define THREAD_LOCAL _Thread_local
--- a/softfloat/doc/SoftFloat.html
+++ b/softfloat/doc/SoftFloat.html
@@ -508,7 +508,7 @@ significant extra cost.
 On computers where the word size is <NOBR>64 bits</NOBR> or larger, both
 function versions (<CODE>f128M_add</CODE> and <CODE>f128_add</CODE>) are
 provided, because the cost of passing by value is then more reasonable.
-Applications that must be portable accross both classes of computers must use
+Applications that must be portable across both classes of computers must use
 the pointer-based functions, as these are always implemented.
 However, if it is known that SoftFloat includes the by-value functions for all
 platforms of interest, programmers can use whichever version they prefer.
--- a/softfloat/source/8086-SSE/s_bf16UIToCommonNaN.c
+++ b/softfloat/source/8086-SSE/s_bf16UIToCommonNaN.c
@@ -0,0 +1,59 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
 | Assuming `uiA' has the bit pattern of a BF16 NaN, converts
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
 void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr )
 {
    if ( softfloat_isSigNaNBF16UI( uiA ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
    }
    zPtr->sign = uiA>>15;
    zPtr->v64  = (uint_fast64_t) uiA<<56;
    zPtr->v0   = 0;
 }
--- a/softfloat/source/8086-SSE/s_commonNaNToBF16UI.c
+++ b/softfloat/source/8086-SSE/s_commonNaNToBF16UI.c
@@ -0,0 +1,51 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "specialize.h"
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by `aPtr' into a BF16 NaN, and 
 | returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
 uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr )
 {
    return (uint_fast16_t) aPtr->sign<<15 | 0x7FC0 | aPtr->v64>>56;
 }
--- a/softfloat/source/8086-SSE/specialize.h
+++ b/softfloat/source/8086-SSE/specialize.h
@@ -116,6 +116,27 @@ uint_fast16_t softfloat_commonNaNToF16UI(const struct commonNaN* aPtr);
 *----------------------------------------------------------------------------*/
 uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);
 /*----------------------------------------------------------------------------
 | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
 | 16-bit brain floating-point (BF16) signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
 #define softfloat_isSigNaNBF16UI(uiA) ((((uiA)&0x7FC0) == 0x7F80) && ((uiA)&0x003F))
 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
 void softfloat_bf16UIToCommonNaN(uint_fast16_t uiA, struct commonNaN* zPtr);
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
 uint_fast16_t softfloat_commonNaNToBF16UI(const struct commonNaN* aPtr);
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
--- a/softfloat/source/RISCV/s_bf16UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_bf16UIToCommonNaN.c
@@ -0,0 +1,5 @@
 /*----------------------------------------------------------------------------
 | This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
--- a/softfloat/source/RISCV/s_commonNaNToBF16UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToBF16UI.c
@@ -0,0 +1,5 @@
 /*----------------------------------------------------------------------------
 | This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
--- a/softfloat/source/RISCV/s_commonNaNToExtF80M.c
+++ b/softfloat/source/RISCV/s_commonNaNToExtF80M.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-All rights reserved.
+California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,9 +34,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "softfloat_types.h"
 #define softfloat_commonNaNToExtF80M softfloat_commonNaNToExtF80M
 #include "specialize.h"
 /*----------------------------------------------------------------------------
@@ -49,8 +50,8 @@ void
     const struct commonNaN *aPtr, struct extFloat80M *zSPtr )
 {
-    zSPtr->signExp = packToExtF80UI64( aPtr->sign, 0x7FFF );
+    zSPtr->signExp = defaultNaNExtF80UI64;
-    zSPtr->signif = UINT64_C( 0xC000000000000000 ) | aPtr->v64>>1;
+    zSPtr->signif  = defaultNaNExtF80UI0;
 }
--- a/softfloat/source/RISCV/s_commonNaNToExtF80UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToExtF80UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-All rights reserved.
+California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,9 +34,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
-#include "primitives.h"
+#include "primitiveTypes.h"
 #define softfloat_commonNaNToExtF80UI softfloat_commonNaNToExtF80UI
 #include "specialize.h"
 /*----------------------------------------------------------------------------
@@ -48,8 +49,8 @@ struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr )
 {
    struct uint128 uiZ;
-    uiZ.v64 = (uint_fast16_t) aPtr->sign<<15 | 0x7FFF;
+    uiZ.v64 = defaultNaNExtF80UI64;
-    uiZ.v0 = UINT64_C( 0xC000000000000000 ) | aPtr->v64>>1;
+    uiZ.v0  = defaultNaNExtF80UI0;
    return uiZ;
 }
--- a/softfloat/source/RISCV/s_commonNaNToF128M.c
+++ b/softfloat/source/RISCV/s_commonNaNToF128M.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-All rights reserved.
+California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -36,7 +36,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <stdint.h>
 #include "platform.h"
-#include "primitives.h"
+#include "primitiveTypes.h"
 #define softfloat_commonNaNToF128M softfloat_commonNaNToF128M
 #include "specialize.h"
 /*----------------------------------------------------------------------------
@@ -49,8 +51,10 @@ void
 softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr )
 {
-    softfloat_shortShiftRight128M( (const uint32_t *) &aPtr->v0, 16, zWPtr );
+    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
-    zWPtr[indexWordHi( 4 )] |= (uint32_t) aPtr->sign<<31 | 0x7FFF8000;
+    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
 }
--- a/softfloat/source/RISCV/s_commonNaNToF128UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToF128UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-All rights reserved.
+California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,9 +34,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
-#include "primitives.h"
+#include "primitiveTypes.h"
 #define softfloat_commonNaNToF128UI softfloat_commonNaNToF128UI
 #include "specialize.h"
 /*----------------------------------------------------------------------------
@@ -47,8 +48,8 @@ struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN *aPtr )
 {
    struct uint128 uiZ;
-    uiZ = softfloat_shortShiftRight128( aPtr->v64, aPtr->v0, 16 );
+    uiZ.v64 = defaultNaNF128UI64;
-    uiZ.v64 |= (uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FFF800000000000 );
+    uiZ.v0  = defaultNaNF128UI0;
    return uiZ;
 }
--- a/softfloat/source/RISCV/s_commonNaNToF16UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToF16UI.c
@@ -1,51 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
 California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "specialize.h"
 /*----------------------------------------------------------------------------
-| Converts the common NaN pointed to by `aPtr' into a 16-bit floating-point
+| This file intentionally contains no code.
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
 uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr )
 {
    return (uint_fast16_t) aPtr->sign<<15 | 0x7E00 | aPtr->v64>>54;
 }
--- a/softfloat/source/RISCV/s_commonNaNToF32UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToF32UI.c
@@ -1,51 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "specialize.h"
 /*----------------------------------------------------------------------------
-| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
+| This file intentionally contains no code.
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
 uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
 {
    return (uint_fast32_t) aPtr->sign<<31 | 0x7FC00000 | aPtr->v64>>41;
 }
--- a/softfloat/source/RISCV/s_commonNaNToF64UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToF64UI.c
@@ -1,53 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "specialize.h"
 /*----------------------------------------------------------------------------
-| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
+| This file intentionally contains no code.
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
 uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr )
 {
    return
        (uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FF8000000000000 )
            | aPtr->v64>>12;
 }
--- a/softfloat/source/RISCV/s_extF80MToCommonNaN.c
+++ b/softfloat/source/RISCV/s_extF80MToCommonNaN.c
@@ -1,62 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
-| Assuming the 80-bit extended floating-point value pointed to by `aSPtr' is
+| This file intentionally contains no code.
 | a NaN, converts this NaN to the common NaN form, and stores the resulting
 | common NaN at the location pointed to by `zPtr'.  If the NaN is a signaling
 | NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
 void
 softfloat_extF80MToCommonNaN(
     const struct extFloat80M *aSPtr, struct commonNaN *zPtr )
 {
    if ( extF80M_isSignalingNaN( (const extFloat80_t *) aSPtr ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
    }
    zPtr->sign = signExtF80UI64( aSPtr->signExp );
    zPtr->v64 = aSPtr->signif<<1;
    zPtr->v0  = 0;
 }
--- a/softfloat/source/RISCV/s_extF80UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_extF80UIToCommonNaN.c
@@ -1,62 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
-| Assuming the unsigned integer formed from concatenating `uiA64' and `uiA0'
+| This file intentionally contains no code.
 | has the bit pattern of an 80-bit extended floating-point NaN, converts
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
 void
 softfloat_extF80UIToCommonNaN(
     uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr )
 {
    if ( softfloat_isSigNaNExtF80UI( uiA64, uiA0 ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
    }
    zPtr->sign = uiA64>>15;
    zPtr->v64  = uiA0<<1;
    zPtr->v0   = 0;
 }
--- a/softfloat/source/RISCV/s_f128MToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f128MToCommonNaN.c
@@ -1,62 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "primitives.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
-| Assuming the 128-bit floating-point value pointed to by `aWPtr' is a NaN,
+| This file intentionally contains no code.
 | converts this NaN to the common NaN form, and stores the resulting common
 | NaN at the location pointed to by `zPtr'.  If the NaN is a signaling NaN,
 | the invalid exception is raised.  Argument `aWPtr' points to an array of
 | four 32-bit elements that concatenate in the platform's normal endian order
 | to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
 void
 softfloat_f128MToCommonNaN( const uint32_t *aWPtr, struct commonNaN *zPtr )
 {
    if ( f128M_isSignalingNaN( (const float128_t *) aWPtr ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
    }
    zPtr->sign = aWPtr[indexWordHi( 4 )]>>31;
    softfloat_shortShiftLeft128M( aWPtr, 16, (uint32_t *) &zPtr->v0 );
 }
--- a/softfloat/source/RISCV/s_f128UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f128UIToCommonNaN.c
@@ -1,65 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "primitives.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
-| Assuming the unsigned integer formed from concatenating `uiA64' and `uiA0'
+| This file intentionally contains no code.
 | has the bit pattern of a 128-bit floating-point NaN, converts this NaN to
 | the common NaN form, and stores the resulting common NaN at the location
 | pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid exception
 | is raised.
 *----------------------------------------------------------------------------*/
 void
 softfloat_f128UIToCommonNaN(
     uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr )
 {
    struct uint128 NaNSig;
    if ( softfloat_isSigNaNF128UI( uiA64, uiA0 ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
    }
    NaNSig = softfloat_shortShiftLeft128( uiA64, uiA0, 16 );
    zPtr->sign = uiA64>>63;
    zPtr->v64  = NaNSig.v64;
    zPtr->v0   = NaNSig.v0;
 }
--- a/softfloat/source/RISCV/s_f16UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f16UIToCommonNaN.c
@@ -1,59 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
 California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
-| Assuming `uiA' has the bit pattern of a 16-bit floating-point NaN, converts
+| This file intentionally contains no code.
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
 void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr )
 {
    if ( softfloat_isSigNaNF16UI( uiA ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
    }
    zPtr->sign = uiA>>15;
    zPtr->v64  = (uint_fast64_t) uiA<<54;
    zPtr->v0   = 0;
 }
--- a/softfloat/source/RISCV/s_f32UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f32UIToCommonNaN.c
@@ -1,59 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
-| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
+| This file intentionally contains no code.
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
 void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr )
 {
    if ( softfloat_isSigNaNF32UI( uiA ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
    }
    zPtr->sign = uiA>>31;
    zPtr->v64  = (uint_fast64_t) uiA<<41;
    zPtr->v0   = 0;
 }
--- a/softfloat/source/RISCV/s_f64UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f64UIToCommonNaN.c
@@ -1,59 +1,5 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
-| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
+| This file intentionally contains no code.
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
 void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr )
 {
    if ( softfloat_isSigNaNF64UI( uiA ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
    }
    zPtr->sign = uiA>>63;
    zPtr->v64  = uiA<<12;
    zPtr->v0   = 0;
 }
--- a/softfloat/source/RISCV/s_propagateNaNExtF80M.c
+++ b/softfloat/source/RISCV/s_propagateNaNExtF80M.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-All rights reserved.
+California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,10 +34,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "primitiveTypes.h"
 #include "specialize.h"
 #include "softfloat.h"
@@ -54,54 +53,22 @@ void
     struct extFloat80M *zSPtr
 )
 {
-    bool isSigNaNA;
+    uint_fast16_t ui64;
-    const struct extFloat80M *sPtr;
+    uint_fast64_t ui0;
    bool isSigNaNB;
    uint_fast16_t uiB64;
    uint64_t uiB0;
    uint_fast16_t uiA64;
    uint64_t uiA0;
    uint_fast16_t uiMagA64, uiMagB64;
-    isSigNaNA = extF80M_isSignalingNaN( (const extFloat80_t *) aSPtr );
+    ui64 = aSPtr->signExp;
-    sPtr = aSPtr;
+    ui0  = aSPtr->signif;
-    if ( ! bSPtr ) {
+    if (
-        if ( isSigNaNA ) softfloat_raiseFlags( softfloat_flag_invalid );
+        softfloat_isSigNaNExtF80UI( ui64, ui0 )
-        goto copy;
+            || (bSPtr
-    }
+                    && (ui64 = bSPtr->signExp,
-    isSigNaNB = extF80M_isSignalingNaN( (const extFloat80_t *) bSPtr );
+                        ui0  = bSPtr->signif,
-    if ( isSigNaNA | isSigNaNB ) {
+                        softfloat_isSigNaNExtF80UI( ui64, ui0 )))
    ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
        if ( isSigNaNA ) {
            uiB64 = bSPtr->signExp;
            if ( isSigNaNB ) goto returnLargerUIMag;
            uiB0 = bSPtr->signif;
            if ( isNaNExtF80UI( uiB64, uiB0 ) ) goto copyB;
            goto copy;
        } else {
            uiA64 = aSPtr->signExp;
            uiA0 = aSPtr->signif;
            if ( isNaNExtF80UI( uiA64, uiA0 ) ) goto copy;
            goto copyB;
        }
    }
-    uiB64 = bSPtr->signExp;
+    zSPtr->signExp = defaultNaNExtF80UI64;
- returnLargerUIMag:
+    zSPtr->signif  = defaultNaNExtF80UI0;
    uiA64 = aSPtr->signExp;
    uiMagA64 = uiA64 & 0x7FFF;
    uiMagB64 = uiB64 & 0x7FFF;
    if ( uiMagA64 < uiMagB64 ) goto copyB;
    if ( uiMagB64 < uiMagA64 ) goto copy;
    uiA0 = aSPtr->signif;
    uiB0 = bSPtr->signif;
    if ( uiA0 < uiB0 ) goto copyB;
    if ( uiB0 < uiA0 ) goto copy;
    if ( uiA64 < uiB64 ) goto copy;
 copyB:
    sPtr = bSPtr;
 copy:
    zSPtr->signExp = sPtr->signExp;
    zSPtr->signif = sPtr->signif | UINT64_C( 0xC000000000000000 );
 }
--- a/softfloat/source/RISCV/s_propagateNaNExtF80UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNExtF80UI.c
@@ -4,7 +4,7 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014, 2018 The Regents of the University of
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
 California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
@@ -34,17 +34,16 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "primitiveTypes.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
-| Interpreting the unsigned integer formed from concatenating 'uiA64' and
+| Interpreting the unsigned integer formed from concatenating `uiA64' and
-| 'uiA0' as an 80-bit extended floating-point value, and likewise interpreting
+| `uiA0' as an 80-bit extended floating-point value, and likewise interpreting
-| the unsigned integer formed from concatenating 'uiB64' and 'uiB0' as another
+| the unsigned integer formed from concatenating `uiB64' and `uiB0' as another
 | 80-bit extended floating-point value, and assuming at least on of these
 | floating-point values is a NaN, returns the bit pattern of the combined NaN
 | result.  If either original floating-point value is a signaling NaN, the
@@ -58,48 +57,16 @@ struct uint128
     uint_fast64_t uiB0
 )
 {
    bool isSigNaNA, isSigNaNB;
    uint_fast64_t uiNonsigA0, uiNonsigB0;
    uint_fast16_t uiMagA64, uiMagB64;
    struct uint128 uiZ;
-    /*------------------------------------------------------------------------
+    if (
-    *------------------------------------------------------------------------*/
+           softfloat_isSigNaNExtF80UI( uiA64, uiA0 )
-    isSigNaNA = softfloat_isSigNaNExtF80UI( uiA64, uiA0 );
+        || softfloat_isSigNaNExtF80UI( uiB64, uiB0 )
-    isSigNaNB = softfloat_isSigNaNExtF80UI( uiB64, uiB0 );
+    ) {
    /*------------------------------------------------------------------------
    | Make NaNs non-signaling.
    *------------------------------------------------------------------------*/
    uiNonsigA0 = uiA0 | UINT64_C( 0xC000000000000000 );
    uiNonsigB0 = uiB0 | UINT64_C( 0xC000000000000000 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( isSigNaNA | isSigNaNB ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
        if ( isSigNaNA ) {
            if ( isSigNaNB ) goto returnLargerMag;
            if ( isNaNExtF80UI( uiB64, uiB0 ) ) goto returnB;
            goto returnA;
        } else {
            if ( isNaNExtF80UI( uiA64, uiA0 ) ) goto returnA;
            goto returnB;
        }
    }
- returnLargerMag:
+    uiZ.v64 = defaultNaNExtF80UI64;
-    uiMagA64 = uiA64 & 0x7FFF;
+    uiZ.v0  = defaultNaNExtF80UI0;
    uiMagB64 = uiB64 & 0x7FFF;
    if ( uiMagA64 < uiMagB64 ) goto returnB;
    if ( uiMagB64 < uiMagA64 ) goto returnA;
    if ( uiA0 < uiB0 ) goto returnB;
    if ( uiB0 < uiA0 ) goto returnA;
    if ( uiA64 < uiB64 ) goto returnA;
 returnB:
    uiZ.v64 = uiB64;
    uiZ.v0  = uiNonsigB0;
    return uiZ;
 returnA:
    uiZ.v64 = uiA64;
    uiZ.v0  = uiNonsigA0;
    return uiZ;
 }
--- a/softfloat/source/RISCV/s_propagateNaNF128M.c
+++ b/softfloat/source/RISCV/s_propagateNaNF128M.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+Copyright 2011, 2012, 2013, 2014, 2015, 2018 The Regents of the University of
-All rights reserved.
+California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,43 +34,35 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "primitiveTypes.h"
 #include "specialize.h"
 #include "softfloat.h"
 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 128-bit floating-point values pointed to by
-| `aWPtr' and `bWPtr' is a NaN, stores the combined NaN result at the location
+| 'aWPtr' and 'bWPtr' is a NaN, stores the combined NaN result at the location
-| pointed to by `zWPtr'.  If either original floating-point value is a
+| pointed to by 'zWPtr'.  If either original floating-point value is a
-| signaling NaN, the invalid exception is raised.  Each of `aWPtr', `bWPtr',
+| signaling NaN, the invalid exception is raised.  Each of 'aWPtr', 'bWPtr',
-| and `zWPtr' points to an array of four 32-bit elements that concatenate in
+| and 'zWPtr' points to an array of four 32-bit elements that concatenate in
 | the platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
 void
 softfloat_propagateNaNF128M(
     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr )
 {
    bool isSigNaNA;
    const uint32_t *ptr;
    ptr = aWPtr;
    isSigNaNA = f128M_isSignalingNaN( (const float128_t *) aWPtr );
    if (
-        isSigNaNA
+        f128M_isSignalingNaN( (const float128_t *) aWPtr )
            || (bWPtr && f128M_isSignalingNaN( (const float128_t *) bWPtr ))
    ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
        if ( isSigNaNA ) goto copy;
    }
-    if ( ! softfloat_isNaNF128M( aWPtr ) ) ptr = bWPtr;
+    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
- copy:
+    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
-    zWPtr[indexWordHi( 4 )] = ptr[indexWordHi( 4 )] | 0x00008000;
+    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
-    zWPtr[indexWord( 4, 2 )] = ptr[indexWord( 4, 2 )];
+    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
    zWPtr[indexWord( 4, 1 )] = ptr[indexWord( 4, 1 )];
    zWPtr[indexWord( 4, 0 )] = ptr[indexWord( 4, 0 )];
 }
--- a/softfloat/source/RISCV/s_propagateNaNF128UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNF128UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-All rights reserved.
+California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,10 +34,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "primitiveTypes.h"
 #include "specialize.h"
 #include "softfloat.h"
@@ -58,23 +57,16 @@ struct uint128
     uint_fast64_t uiB0
 )
 {
    bool isSigNaNA;
    struct uint128 uiZ;
-    isSigNaNA = softfloat_isSigNaNF128UI( uiA64, uiA0 );
+    if (
-    if ( isSigNaNA || softfloat_isSigNaNF128UI( uiB64, uiB0 ) ) {
+           softfloat_isSigNaNF128UI( uiA64, uiA0 )
        || softfloat_isSigNaNF128UI( uiB64, uiB0 )
    ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
        if ( isSigNaNA ) goto returnNonsigA;
    }
-    if ( isNaNF128UI( uiA64, uiA0 ) ) {
+    uiZ.v64 = defaultNaNF128UI64;
- returnNonsigA:
+    uiZ.v0  = defaultNaNF128UI0;
        uiZ.v64 = uiA64;
        uiZ.v0  = uiA0;
    } else {
        uiZ.v64 = uiB64;
        uiZ.v0  = uiB0;
    }
    uiZ.v64 |= UINT64_C( 0x0000800000000000 );
    return uiZ;
 }
--- a/softfloat/source/RISCV/s_propagateNaNF16UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNF16UI.c
@@ -4,7 +4,7 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
 California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
@@ -34,10 +34,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
 #include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"
@@ -50,14 +48,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 uint_fast16_t
 softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB )
 {
    bool isSigNaNA;
-    isSigNaNA = softfloat_isSigNaNF16UI( uiA );
+    if ( softfloat_isSigNaNF16UI( uiA ) || softfloat_isSigNaNF16UI( uiB ) ) {
    if ( isSigNaNA || softfloat_isSigNaNF16UI( uiB ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
        if ( isSigNaNA ) return uiA | 0x0200;
    }
-    return (isNaNF16UI( uiA ) ? uiA : uiB) | 0x0200;
+    return defaultNaNF16UI;
 }
--- a/softfloat/source/RISCV/s_propagateNaNF32UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNF32UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-All rights reserved.
+California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,10 +34,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
 #include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"
@@ -50,14 +48,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 uint_fast32_t
 softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB )
 {
    bool isSigNaNA;
-    isSigNaNA = softfloat_isSigNaNF32UI( uiA );
+    if ( softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB ) ) {
    if ( isSigNaNA || softfloat_isSigNaNF32UI( uiB ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
        if ( isSigNaNA ) return uiA | 0x00400000;
    }
-    return (isNaNF32UI( uiA ) ? uiA : uiB) | 0x00400000;
+    return defaultNaNF32UI;
 }
--- a/softfloat/source/RISCV/s_propagateNaNF64UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNF64UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-All rights reserved.
+California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,10 +34,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
 #include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"
@@ -50,14 +48,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 uint_fast64_t
 softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB )
 {
    bool isSigNaNA;
-    isSigNaNA = softfloat_isSigNaNF64UI( uiA );
+    if ( softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB ) ) {
    if ( isSigNaNA || softfloat_isSigNaNF64UI( uiB ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
        if ( isSigNaNA ) return uiA | UINT64_C( 0x0008000000000000 );
    }
-    return (isNaNF64UI( uiA ) ? uiA : uiB) | UINT64_C( 0x0008000000000000 );
+    return defaultNaNF64UI;
 }
--- a/softfloat/source/RISCV/specialize.h
+++ b/softfloat/source/RISCV/specialize.h
@@ -51,19 +51,19 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | The values to return on conversions to 32-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
-#define ui32_fromPosOverflow UINT32_C(0xFFFFFFFF)
+#define ui32_fromPosOverflow 0xFFFFFFFF
-#define ui32_fromNegOverflow UINT32_C(0x0)
+#define ui32_fromNegOverflow 0
-#define ui32_fromNaN UINT32_C(0xFFFFFFFF)
+#define ui32_fromNaN 0xFFFFFFFF
-#define i32_fromPosOverflow INT64_C(0x7FFFFFFF)
+#define i32_fromPosOverflow 0x7FFFFFFF
-#define i32_fromNegOverflow (-INT64_C(0x7FFFFFFF) - 1)
+#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
-#define i32_fromNaN INT64_C(0x7FFFFFFF)
+#define i32_fromNaN 0x7FFFFFFF
 /*----------------------------------------------------------------------------
 | The values to return on conversions to 64-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
 #define ui64_fromPosOverflow UINT64_C(0xFFFFFFFFFFFFFFFF)
-#define ui64_fromNegOverflow UINT64_C(0x0)
+#define ui64_fromNegOverflow 0
 #define ui64_fromNaN UINT64_C(0xFFFFFFFFFFFFFFFF)
 #define i64_fromPosOverflow INT64_C(0x7FFFFFFFFFFFFFFF)
 #define i64_fromNegOverflow (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)
@@ -74,18 +74,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | to another.
 *----------------------------------------------------------------------------*/
 struct commonNaN {
-    bool sign;
+    char _unused;
 #ifdef LITTLEENDIAN
    uint64_t v0, v64;
 #else
    uint64_t v64, v0;
 #endif
 };
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 16-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF16UI 0xFE00
+#define defaultNaNF16UI 0x7E00
 /*----------------------------------------------------------------------------
 | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
@@ -94,19 +89,38 @@ struct commonNaN {
 *----------------------------------------------------------------------------*/
 #define softfloat_isSigNaNF16UI(uiA) ((((uiA)&0x7E00) == 0x7C00) && ((uiA)&0x01FF))
 /*----------------------------------------------------------------------------
 | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
 | 16-bit brain floating-point (BF16) signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
 #define softfloat_isSigNaNBF16UI(uiA) ((((uiA)&0x7FC0) == 0x7F80) && ((uiA)&0x003F))
 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f16UIToCommonNaN(uint_fast16_t uiA, struct commonNaN* zPtr);
+#define softfloat_f16UIToCommonNaN(uiA, zPtr)                                                                                              \
    if(!((uiA)&0x0200))                                                                                                                    \
    softfloat_raiseFlags(softfloat_flag_invalid)
 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
 #define softfloat_bf16UIToCommonNaN(uiA, zPtr)                                                                                             \
    if(!((uiA)&0x0040))                                                                                                                    \
    softfloat_raiseFlags(softfloat_flag_invalid)
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast16_t softfloat_commonNaNToF16UI(const struct commonNaN* aPtr);
+#define softfloat_commonNaNToF16UI(aPtr) ((uint_fast16_t)defaultNaNF16UI)
 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 16-bit floating-
@@ -116,6 +130,17 @@ uint_fast16_t softfloat_commonNaNToF16UI(const struct commonNaN* aPtr);
 *----------------------------------------------------------------------------*/
 uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 16-bit BF16 floating-point NaN.
 *----------------------------------------------------------------------------*/
 #define defaultNaNBF16UI 0x7FC0
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
 #define softfloat_commonNaNToBF16UI(aPtr) ((uint_fast16_t)defaultNaNBF16UI)
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
@@ -134,13 +159,15 @@ uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f32UIToCommonNaN(uint_fast32_t uiA, struct commonNaN* zPtr);
+#define softfloat_f32UIToCommonNaN(uiA, zPtr)                                                                                              \
    if(!((uiA)&0x00400000))                                                                                                                \
    softfloat_raiseFlags(softfloat_flag_invalid)
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast32_t softfloat_commonNaNToF32UI(const struct commonNaN* aPtr);
+#define softfloat_commonNaNToF32UI(aPtr) ((uint_fast32_t)defaultNaNF32UI)
 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
@@ -169,13 +196,15 @@ uint_fast32_t softfloat_propagateNaNF32UI(uint_fast32_t uiA, uint_fast32_t uiB);
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f64UIToCommonNaN(uint_fast64_t uiA, struct commonNaN* zPtr);
+#define softfloat_f64UIToCommonNaN(uiA, zPtr)                                                                                              \
    if(!((uiA)&UINT64_C(0x0008000000000000)))                                                                                              \
    softfloat_raiseFlags(softfloat_flag_invalid)
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast64_t softfloat_commonNaNToF64UI(const struct commonNaN* aPtr);
+#define softfloat_commonNaNToF64UI(aPtr) ((uint_fast64_t)defaultNaNF64UI)
 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 64-bit floating-
@@ -188,7 +217,7 @@ uint_fast64_t softfloat_propagateNaNF64UI(uint_fast64_t uiA, uint_fast64_t uiB);
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 80-bit extended floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNExtF80UI64 0xFFFF
+#define defaultNaNExtF80UI64 0x7FFF
 #define defaultNaNExtF80UI0 UINT64_C(0xC000000000000000)
 /*----------------------------------------------------------------------------
@@ -214,14 +243,26 @@ uint_fast64_t softfloat_propagateNaNF64UI(uint_fast64_t uiA, uint_fast64_t uiB);
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_extF80UIToCommonNaN(uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);
+#define softfloat_extF80UIToCommonNaN(uiA64, uiA0, zPtr)                                                                                   \
    if(!((uiA0)&UINT64_C(0x4000000000000000)))                                                                                             \
    softfloat_raiseFlags(softfloat_flag_invalid)
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and returns the bit pattern of this value as an unsigned
 | integer.
 *----------------------------------------------------------------------------*/
 #if defined INLINE && !defined softfloat_commonNaNToExtF80UI
 INLINE
 struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr) {
    struct uint128 uiZ;
    uiZ.v64 = defaultNaNExtF80UI64;
    uiZ.v0 = defaultNaNExtF80UI0;
    return uiZ;
 }
 #else
 struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr);
 #endif
 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -237,7 +278,7 @@ struct uint128 softfloat_propagateNaNExtF80UI(uint_fast16_t uiA64, uint_fast64_t
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF128UI64 UINT64_C(0xFFFF800000000000)
+#define defaultNaNF128UI64 UINT64_C(0x7FFF800000000000)
 #define defaultNaNF128UI0 UINT64_C(0)
 /*----------------------------------------------------------------------------
@@ -256,13 +297,25 @@ struct uint128 softfloat_propagateNaNExtF80UI(uint_fast16_t uiA64, uint_fast64_t
 | pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid exception
 | is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f128UIToCommonNaN(uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);
+#define softfloat_f128UIToCommonNaN(uiA64, uiA0, zPtr)                                                                                     \
    if(!((uiA64)&UINT64_C(0x0000800000000000)))                                                                                            \
    softfloat_raiseFlags(softfloat_flag_invalid)
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
 #if defined INLINE && !defined softfloat_commonNaNToF128UI
 INLINE
 struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN* aPtr) {
    struct uint128 uiZ;
    uiZ.v64 = defaultNaNF128UI64;
    uiZ.v0 = defaultNaNF128UI0;
    return uiZ;
 }
 #else
 struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN*);
 #endif
 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -288,14 +341,24 @@ struct uint128 softfloat_propagateNaNF128UI(uint_fast64_t uiA64, uint_fast64_t u
 | common NaN at the location pointed to by 'zPtr'.  If the NaN is a signaling
 | NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_extF80MToCommonNaN(const struct extFloat80M* aSPtr, struct commonNaN* zPtr);
+#define softfloat_extF80MToCommonNaN(aSPtr, zPtr)                                                                                          \
    if(!((aSPtr)->signif & UINT64_C(0x4000000000000000)))                                                                                  \
    softfloat_raiseFlags(softfloat_flag_invalid)
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and stores this NaN at the location pointed to by
 | 'zSPtr'.
 *----------------------------------------------------------------------------*/
 #if defined INLINE && !defined softfloat_commonNaNToExtF80M
 INLINE
 void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr) {
    zSPtr->signExp = defaultNaNExtF80UI64;
    zSPtr->signif = defaultNaNExtF80UI0;
 }
 #else
 void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr);
 #endif
 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 80-bit extended floating-point values
@@ -308,7 +371,7 @@ void softfloat_propagateNaNExtF80M(const struct extFloat80M* aSPtr, const struct
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF128UI96 0xFFFF8000
+#define defaultNaNF128UI96 0x7FFF8000
 #define defaultNaNF128UI64 0
 #define defaultNaNF128UI32 0
 #define defaultNaNF128UI0 0
@@ -321,7 +384,9 @@ void softfloat_propagateNaNExtF80M(const struct extFloat80M* aSPtr, const struct
 | four 32-bit elements that concatenate in the platform's normal endian order
 | to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void softfloat_f128MToCommonNaN(const uint32_t* aWPtr, struct commonNaN* zPtr);
+#define softfloat_f128MToCommonNaN(aWPtr, zPtr)                                                                                            \
    if(!((aWPtr)[indexWordHi(4)] & UINT64_C(0x0000800000000000)))                                                                          \
    softfloat_raiseFlags(softfloat_flag_invalid)
 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
@@ -329,7 +394,17 @@ void softfloat_f128MToCommonNaN(const uint32_t* aWPtr, struct commonNaN* zPtr);
 | 'zWPtr' points to an array of four 32-bit elements that concatenate in the
 | platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
 #if defined INLINE && !defined softfloat_commonNaNToF128M
 INLINE
 void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr) {
    zWPtr[indexWord(4, 3)] = defaultNaNF128UI96;
    zWPtr[indexWord(4, 2)] = defaultNaNF128UI64;
    zWPtr[indexWord(4, 1)] = defaultNaNF128UI32;
    zWPtr[indexWord(4, 0)] = defaultNaNF128UI0;
 }
 #else
 void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr);
 #endif
 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 128-bit floating-point values pointed to by
--- a/softfloat/source/bf16_isSignalingNaN.c
+++ b/softfloat/source/bf16_isSignalingNaN.c
@@ -0,0 +1,51 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
 California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include "platform.h"
 #include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"
 bool bf16_isSignalingNaN( bfloat16_t a )
 {
    union ui16_bf16 uA;
    uA.f = a;
    return softfloat_isSigNaNBF16UI( uA.ui );
 }
--- a/softfloat/source/bf16_to_f32.c
+++ b/softfloat/source/bf16_to_f32.c
@@ -0,0 +1,90 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
 California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
 #include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"
 float32_t bf16_to_f32( bfloat16_t a )
 {
    union ui16_bf16 uA;
    uint_fast16_t uiA;
    bool sign;
    int_fast16_t exp;
    uint_fast16_t frac;
    struct commonNaN commonNaN;
    uint_fast32_t uiZ;
    struct exp8_sig16 normExpSig;
    union ui32_f32 uZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA = uA.ui;
    sign = signBF16UI( uiA );
    exp  = expBF16UI( uiA );
    frac = fracBF16UI( uiA );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    // NaN or Inf
    if ( exp == 0xFF ) {
        if ( frac ) {
            softfloat_bf16UIToCommonNaN( uiA, &commonNaN );
            uiZ = softfloat_commonNaNToF32UI( &commonNaN );
        } else {
            uiZ = packToF32UI( sign, 0xFF, 0 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    // packToF32UI simply packs bitfields without any numerical change
    // which means it can be used directly for any BF16 to f32 conversions which
    // does not require bits manipulation
    // (that is everything where the 16-bit are just padded right with 16 zeros, including
    //  subnormal numbers)
    uiZ = packToF32UI( sign, exp, ((uint_fast32_t) frac) <<16 );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;
 }
--- a/softfloat/source/f32_to_bf16.c
+++ b/softfloat/source/f32_to_bf16.c
@@ -0,0 +1,105 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
 California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
 #include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"
 #include <inttypes.h>
 #include <stdio.h>
 bfloat16_t f32_to_bf16( float32_t a )
 {
    union ui32_f32 uA;
    uint_fast32_t uiA;
    bool sign;
    int_fast16_t exp;
    uint_fast32_t frac;
    struct commonNaN commonNaN;
    uint_fast16_t uiZ, frac16;
    union ui16_bf16 uZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA = uA.ui;
    sign = signF32UI( uiA );
    exp  = expF32UI( uiA );
    frac = fracF32UI( uiA );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    // infinity or NaN cases
    if ( exp == 0xFF ) {
        if ( frac ) {
            // NaN case
            softfloat_f32UIToCommonNaN( uiA, &commonNaN );
            uiZ = softfloat_commonNaNToBF16UI( &commonNaN );
        } else {
            // infinity case
            uiZ = packToBF16UI( sign, 0xFF, 0 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    // frac is a 24-bit mantissa, right shifted by 9
    // In the normal case, (24-9) = 15 are set 
    frac16 = frac>>9 | ((frac & 0x1FF) != 0);
    if ( ! (exp | frac16) ) {
        uiZ = packToBF16UI( sign, 0, 0 );
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    // softfloat_roundPackToBF16 exponent argument (2nd argument)
    // must correspond to the exponent of fracIn[13] bits
    // (fracIn is the 3rd and last argument) 
    uint_fast32_t mask = exp ? 0x4000 : 0x0; // implicit one mask added if input is a normal number
    // exponent for the lowest normal and largest subnormal should be equal
    // but is not in IEEE encoding so mantissa must be partially normalized
    // (by one bit) for subnormal numbers. Such that (exp - 1) corresponds
    // to the exponent of frac16[13]
    frac16 = frac16 << (exp ? 0 : 1);
    return softfloat_roundPackToBF16( sign, exp - 1, frac16 | mask );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;
 }
--- a/softfloat/source/f32_to_f16.c
+++ b/softfloat/source/f32_to_f16.c
@@ -72,6 +72,9 @@ float16_t f32_to_f16( float32_t a )
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    // frac is a 24-bit significand, the bottom 9 bits LSB are extracted and OR-red
    // into a sticky flag, the top 15 MSBs are extracted, the LSB of this top slice
    // is OR-red with the sticky 
    frac16 = frac>>9 | ((frac & 0x1FF) != 0);
    if ( ! (exp | frac16) ) {
        uiZ = packToF16UI( sign, 0, 0 );
--- a/softfloat/source/include/internals.h
+++ b/softfloat/source/include/internals.h
@@ -46,6 +46,10 @@ union ui16_f16 {
    uint16_t ui;
    float16_t f;
 };
 union ui16_bf16 {
    uint16_t ui;
    bfloat16_t f;
 };
 union ui32_f32 {
    uint32_t ui;
    float32_t f;
@@ -108,6 +112,18 @@ float16_t softfloat_addMagsF16(uint_fast16_t, uint_fast16_t);
 float16_t softfloat_subMagsF16(uint_fast16_t, uint_fast16_t);
 float16_t softfloat_mulAddF16(uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t);
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
 #define signBF16UI(a) ((bool)((uint16_t)(a) >> 15))
 #define expBF16UI(a) ((int_fast16_t)((a) >> 7) & 0xFF)
 #define fracBF16UI(a) ((a)&0x07F)
 #define packToBF16UI(sign, exp, sig) (((uint16_t)(sign) << 15) + ((uint16_t)(exp) << 7) + (sig))
 #define isNaNBF16UI(a) (((~(a)&0x7FC0) == 0) && ((a)&0x07F))
 bfloat16_t softfloat_roundPackToBF16(bool, int_fast16_t, uint_fast16_t);
 struct exp8_sig16 softfloat_normSubnormalBF16Sig(uint_fast16_t);
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
 #define signF32UI(a) ((bool)((uint32_t)(a) >> 31))
--- a/softfloat/source/include/softfloat.h
+++ b/softfloat/source/include/softfloat.h
@@ -76,13 +76,13 @@ enum {
 | Software floating-point exception flags.
 *----------------------------------------------------------------------------*/
 extern THREAD_LOCAL uint_fast8_t softfloat_exceptionFlags;
-enum {
+typedef enum {
    softfloat_flag_inexact = 1,
    softfloat_flag_underflow = 2,
    softfloat_flag_overflow = 4,
    softfloat_flag_infinite = 8,
    softfloat_flag_invalid = 16
-};
+} exceptionFlag_t;
 /*----------------------------------------------------------------------------
 | Routine to raise any or all of the software floating-point exception flags.
@@ -164,6 +164,13 @@ bool f16_le_quiet(float16_t, float16_t);
 bool f16_lt_quiet(float16_t, float16_t);
 bool f16_isSignalingNaN(float16_t);
 /*----------------------------------------------------------------------------
 | 16-bit (brain float 16) floating-point operations.
 *----------------------------------------------------------------------------*/
 float32_t bf16_to_f32(bfloat16_t);
 bfloat16_t f32_to_bf16(float32_t);
 bool bf16_isSignalingNaN(bfloat16_t);
 /*----------------------------------------------------------------------------
 | 32-bit (single-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
--- a/softfloat/source/include/softfloat_types.h
+++ b/softfloat/source/include/softfloat_types.h
@@ -50,6 +50,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 typedef struct {
    uint16_t v;
 } float16_t;
 typedef struct {
    uint16_t v;
 } bfloat16_t;
 typedef struct {
    uint32_t v;
 } float32_t;
--- a/softfloat/source/s_mulAddF32.c
+++ b/softfloat/source/s_mulAddF32.c
@@ -221,4 +221,3 @@ float32_t
    return uZ.f;
 }
--- a/softfloat/source/s_normSubnormalBF16Sig.c
+++ b/softfloat/source/s_normSubnormalBF16Sig.c
@@ -0,0 +1,52 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
 California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdint.h>
 #include "platform.h"
 #include "internals.h"
 struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t sig )
 {
    int_fast8_t shiftDist;
    struct exp8_sig16 z;
    shiftDist = softfloat_countLeadingZeros16( sig ) - 8;
    z.exp = 1 - shiftDist;
    z.sig = sig<<shiftDist;
    return z;
 }
--- a/softfloat/source/s_roundPackToBF16.c
+++ b/softfloat/source/s_roundPackToBF16.c
@@ -0,0 +1,114 @@
 /*============================================================================
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.
 Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
 California.  All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.
 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
 DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/
 #include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
 #include "internals.h"
 #include "softfloat.h"
 /** sig last significant bit is sig[7], the 7 LSBs will be used for rounding */
 bfloat16_t
 softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
 {
    uint_fast8_t roundingMode;
    bool roundNearEven;
    uint_fast8_t roundIncrement, roundBits;
    bool isTiny;
    uint_fast16_t uiZ;
    union ui16_bf16 uZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = softfloat_roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    roundIncrement = 0x40;
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
        roundIncrement =
            (roundingMode
                 == (sign ? softfloat_round_min : softfloat_round_max))
                ? 0x7F
                : 0;
    }
    roundBits = sig & 0x7F;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0xFD <= (unsigned int) exp ) {
        if ( exp < 0 ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            isTiny =
                (softfloat_detectTininess == softfloat_tininess_beforeRounding)
                    || (exp < -1) || (sig + roundIncrement < 0x8000);
            sig = softfloat_shiftRightJam32( sig, -exp );
            exp = 0;
            roundBits = sig & 0x7F;
            if ( isTiny && roundBits ) {
                softfloat_raiseFlags( softfloat_flag_underflow );
            }
        } else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            softfloat_raiseFlags(
                softfloat_flag_overflow | softfloat_flag_inexact );
            uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement;
            goto uiZ;
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sig = (sig + roundIncrement)>>7;
    if ( roundBits ) {
        softfloat_exceptionFlags |= softfloat_flag_inexact;
 #ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) {
            sig |= 1;
            goto packReturn;
        }
 #endif
    }
    sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven);
    if ( ! sig ) exp = 0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 packReturn:
    uiZ = packToBF16UI( sign, exp, sig );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;
 }
--- a/src/elfio.cpp
+++ b/src/elfio.cpp
@@ -0,0 +1,35 @@
 #ifdef _MSC_VER
 #define _SCL_SECURE_NO_WARNINGS
 #define ELFIO_NO_INTTYPES
 #endif
 #include <elfio/elfio_dump.hpp>
 #include <iostream>
 using namespace ELFIO;
 int main(int argc, char** argv) {
    if(argc != 2) {
        printf("Usage: elfdump <file_name>\n");
        return 1;
    }
    elfio reader;
    if(!reader.load(argv[1])) {
        printf("File %s is not found or it is not an ELF file\n", argv[1]);
        return 1;
    }
    dump::header(std::cout, reader);
    dump::section_headers(std::cout, reader);
    dump::segment_headers(std::cout, reader);
    dump::symbol_tables(std::cout, reader);
    dump::notes(std::cout, reader);
    dump::modinfo(std::cout, reader);
    dump::dynamic_tags(std::cout, reader);
    dump::section_datas(std::cout, reader);
    dump::segment_datas(std::cout, reader);
    return 0;
 }
--- a/src/iss/arch/hwl.h
+++ b/src/iss/arch/hwl.h
@@ -51,8 +51,8 @@ public:
    virtual ~hwl() = default;
 protected:
-    iss::status read_custom_csr_reg(unsigned addr, reg_t& val) override;
+    iss::status read_custom_csr(unsigned addr, reg_t& val) override;
-    iss::status write_custom_csr_reg(unsigned addr, reg_t val) override;
+    iss::status write_custom_csr(unsigned addr, reg_t val) override;
 };
 template <typename BASE>
@@ -68,7 +68,7 @@ inline hwl<BASE>::hwl(feature_config cfg)
    }
 }
-template <typename BASE> inline iss::status iss::arch::hwl<BASE>::read_custom_csr_reg(unsigned addr, reg_t& val) {
+template <typename BASE> inline iss::status iss::arch::hwl<BASE>::read_custom_csr(unsigned addr, reg_t& val) {
    switch(addr) {
    case 0x800:
        val = this->reg.lpstart0;
@@ -92,7 +92,7 @@ template <typename BASE> inline iss::status iss::arch::hwl<BASE>::read_custom_cs
    return iss::Ok;
 }
-template <typename BASE> inline iss::status iss::arch::hwl<BASE>::write_custom_csr_reg(unsigned addr, reg_t val) {
+template <typename BASE> inline iss::status iss::arch::hwl<BASE>::write_custom_csr(unsigned addr, reg_t val) {
    switch(addr) {
    case 0x800:
        this->reg.lpstart0 = val;
--- a/src/iss/arch/mstatus.h
+++ b/src/iss/arch/mstatus.h
@@ -0,0 +1,233 @@
 /*******************************************************************************
 * Copyright (C) 2025 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * Contributors:
 *       eyck@minres.com - initial implementation
 ******************************************************************************/
 #ifndef _MSTATUS_TYPE
 #define _MSTATUS_TYPE
 #include <cstdint>
 #include <type_traits>
 #include <util/bit_field.h>
 #include <util/ities.h>
 namespace iss {
 namespace arch {
 template <class T, class Enable = void> struct status {};
 // specialization 32bit
 template <typename T> struct status<T, typename std::enable_if<std::is_same<T, uint32_t>::value>::type> {
    static inline unsigned SD(T v) { return bit_sub<63, 1>(v); }
    // value of XLEN for S-mode
    static inline unsigned SXL(T v) { return bit_sub<34, 2>(v); };
    // value of XLEN for U-mode
    static inline unsigned UXL(T v) { return bit_sub<32, 2>(v); };
    // Trap SRET
    static inline unsigned TSR(T v) { return bit_sub<22, 1>(v); };
    // Timeout Wait
    static inline unsigned TW(T v) { return bit_sub<21, 1>(v); };
    // Trap Virtual Memory
    static inline unsigned TVM(T v) { return bit_sub<20, 1>(v); };
    // Make eXecutable Readable
    static inline unsigned MXR(T v) { return bit_sub<19, 1>(v); };
    // permit Supervisor User Memory access
    static inline unsigned SUM(T v) { return bit_sub<18, 1>(v); };
    // Modify PRiVilege
    static inline unsigned MPRV(T v) { return bit_sub<17, 1>(v); };
    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
    // dirty, some clean/Some dirty
    static inline unsigned XS(T v) { return bit_sub<15, 2>(v); };
    // floating-point unit status Off/Initial/Clean/Dirty
    static inline unsigned FS(T v) { return bit_sub<13, 2>(v); };
    // machine previous privilege
    static inline unsigned MPP(T v) { return bit_sub<11, 2>(v); };
    // supervisor previous privilege
    static inline unsigned SPP(T v) { return bit_sub<8, 1>(v); };
    // previous machine interrupt-enable
    static inline unsigned MPIE(T v) { return bit_sub<7, 1>(v); };
    // previous supervisor interrupt-enable
    static inline unsigned SPIE(T v) { return bit_sub<5, 1>(v); };
    // previous user interrupt-enable
    static inline unsigned UPIE(T v) { return bit_sub<4, 1>(v); };
    // machine interrupt-enable
    static inline unsigned MIE(T v) { return bit_sub<3, 1>(v); };
    // supervisor interrupt-enable
    static inline unsigned SIE(T v) { return bit_sub<1, 1>(v); };
    // user interrupt-enable
    static inline unsigned UIE(T v) { return bit_sub<0, 1>(v); };
 };
 template <typename T> struct status<T, typename std::enable_if<std::is_same<T, uint64_t>::value>::type> {
 public:
    // SD bit is read-only and is set when either the FS or XS bits encode a Dirty state (i.e., SD=((FS==11) OR
    // XS==11)))
    static inline unsigned SD(T v) { return bit_sub<63, 1>(v); };
    // value of XLEN for S-mode
    static inline unsigned SXL(T v) { return bit_sub<34, 2>(v); };
    // value of XLEN for U-mode
    static inline unsigned UXL(T v) { return bit_sub<32, 2>(v); };
    // Trap SRET
    static inline unsigned TSR(T v) { return bit_sub<22, 1>(v); };
    // Timeout Wait
    static inline unsigned TW(T v) { return bit_sub<21, 1>(v); };
    // Trap Virtual Memory
    static inline unsigned TVM(T v) { return bit_sub<20, 1>(v); };
    // Make eXecutable Readable
    static inline unsigned MXR(T v) { return bit_sub<19, 1>(v); };
    // permit Supervisor User Memory access
    static inline unsigned SUM(T v) { return bit_sub<18, 1>(v); };
    // Modify PRiVilege
    static inline unsigned MPRV(T v) { return bit_sub<17, 1>(v); };
    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
    // dirty, some clean/Some dirty
    static inline unsigned XS(T v) { return bit_sub<15, 2>(v); };
    // floating-point unit status Off/Initial/Clean/Dirty
    static inline unsigned FS(T v) { return bit_sub<13, 2>(v); };
    // machine previous privilege
    static inline unsigned MPP(T v) { return bit_sub<11, 2>(v); };
    // supervisor previous privilege
    static inline unsigned SPP(T v) { return bit_sub<8, 1>(v); };
    // previous machine interrupt-enable
    static inline unsigned MPIE(T v) { return bit_sub<7, 1>(v); };
    // previous supervisor interrupt-enable
    static inline unsigned SPIE(T v) { return bit_sub<5, 1>(v); };
    // previous user interrupt-enable
    static inline unsigned UPIE(T v) { return bit_sub<4, 1>(v); };
    // machine interrupt-enable
    static inline unsigned MIE(T v) { return bit_sub<3, 1>(v); };
    // supervisor interrupt-enable
    static inline unsigned SIE(T v) { return bit_sub<1, 1>(v); };
    // user interrupt-enable
    static inline unsigned UIE(T v) { return bit_sub<0, 1>(v); };
 };
 // primary template
 template <class T, class Enable = void> struct hart_state {};
 // specialization 32bit
 template <typename T> class hart_state<T, typename std::enable_if<std::is_same<T, uint32_t>::value>::type> {
 public:
    BEGIN_BF_DECL(mstatus_t, T);
    // SD bit is read-only and is set when either the FS or XS bits encode a Dirty state (i.e., SD=((FS==11) OR
    // XS==11)))
    BF_FIELD(SD, 31, 1);
    // Trap SRET
    BF_FIELD(TSR, 22, 1);
    // Timeout Wait
    BF_FIELD(TW, 21, 1);
    // Trap Virtual Memory
    BF_FIELD(TVM, 20, 1);
    // Make eXecutable Readable
    BF_FIELD(MXR, 19, 1);
    // permit Supervisor User Memory access
    BF_FIELD(SUM, 18, 1);
    // Modify PRiVilege
    BF_FIELD(MPRV, 17, 1);
    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
    // dirty, some clean/Some dirty
    BF_FIELD(XS, 15, 2);
    // floating-point unit status Off/Initial/Clean/Dirty
    BF_FIELD(FS, 13, 2);
    // machine previous privilege
    BF_FIELD(MPP, 11, 2);
    // supervisor previous privilege
    BF_FIELD(SPP, 8, 1);
    // previous machine interrupt-enable
    BF_FIELD(MPIE, 7, 1);
    // previous supervisor interrupt-enable
    BF_FIELD(SPIE, 5, 1);
    // previous user interrupt-enable
    BF_FIELD(UPIE, 4, 1);
    // machine interrupt-enable
    BF_FIELD(MIE, 3, 1);
    // supervisor interrupt-enable
    BF_FIELD(SIE, 1, 1);
    // user interrupt-enable
    BF_FIELD(UIE, 0, 1);
    END_BF_DECL();
    mstatus_t mstatus;
    static const T mstatus_reset_val = 0x1800;
 };
 // specialization 64bit
 template <typename T> class hart_state<T, typename std::enable_if<std::is_same<T, uint64_t>::value>::type> {
 public:
    BEGIN_BF_DECL(mstatus_t, T);
    // SD bit is read-only and is set when either the FS or XS bits encode a Dirty state (i.e., SD=((FS==11) OR
    // XS==11)))
    BF_FIELD(SD, 63, 1);
    // value of XLEN for S-mode
    BF_FIELD(SXL, 34, 2);
    // value of XLEN for U-mode
    BF_FIELD(UXL, 32, 2);
    // Trap SRET
    BF_FIELD(TSR, 22, 1);
    // Timeout Wait
    BF_FIELD(TW, 21, 1);
    // Trap Virtual Memory
    BF_FIELD(TVM, 20, 1);
    // Make eXecutable Readable
    BF_FIELD(MXR, 19, 1);
    // permit Supervisor User Memory access
    BF_FIELD(SUM, 18, 1);
    // Modify PRiVilege
    BF_FIELD(MPRV, 17, 1);
    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
    // dirty, some clean/Some dirty
    BF_FIELD(XS, 15, 2);
    // floating-point unit status Off/Initial/Clean/Dirty
    BF_FIELD(FS, 13, 2);
    // machine previous privilege
    BF_FIELD(MPP, 11, 2);
    // supervisor previous privilege
    BF_FIELD(SPP, 8, 1);
    // previous machine interrupt-enable
    BF_FIELD(MPIE, 7, 1);
    // previous supervisor interrupt-enable
    BF_FIELD(SPIE, 5, 1);
    // previous user interrupt-enable
    BF_FIELD(UPIE, 4, 1);
    // machine interrupt-enable
    BF_FIELD(MIE, 3, 1);
    // supervisor interrupt-enable
    BF_FIELD(SIE, 1, 1);
    // user interrupt-enable
    BF_FIELD(UIE, 0, 1);
    END_BF_DECL();
    mstatus_t mstatus;
    static const T mstatus_reset_val = 0x1800;
 };
 } // namespace arch
 } // namespace iss
 #endif // _MSTATUS_TYPE
--- a/src/iss/arch/riscv_hart_common.h
+++ b/src/iss/arch/riscv_hart_common.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017, 2018, 2021 MINRES Technologies GmbH
+ * Copyright (C) 2017 - 2025 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -35,21 +35,38 @@
 #ifndef _RISCV_HART_COMMON
 #define _RISCV_HART_COMMON
 #include "iss/arch/traits.h"
 #include "iss/log_categories.h"
 #include "iss/mmio/memory_if.h"
 #include "iss/vm_types.h"
 #include "mstatus.h"
 #include "util/delegate.h"
 #include <array>
 #include <cstdint>
 #include <elfio/elfio.hpp>
 #include <fmt/format.h>
 #include <iss/arch_if.h>
 #include <iss/log_categories.h>
 #include <iss/semihosting/semihosting.h>
 #include <limits>
 #include <sstream>
 #include <string>
 #include <unordered_map>
 #include <util/logging.h>
 #include <util/sparse_array.h>
 #if defined(__GNUC__)
 #define likely(x) ::__builtin_expect(!!(x), 1)
 #define unlikely(x) ::__builtin_expect(!!(x), 0)
 #else
 #define likely(x) x
 #define unlikely(x) x
 #endif
 namespace iss {
 namespace arch {
-enum { tohost_dflt = 0xF0001000, fromhost_dflt = 0xF0001040 };
+enum features_e { FEAT_NONE, FEAT_EXT_N = 1, FEAT_DEBUG = 2 };
 enum features_e { FEAT_NONE, FEAT_PMP = 1, FEAT_EXT_N = 2, FEAT_CLIC = 4, FEAT_DEBUG = 8, FEAT_TCM = 16 };
 enum riscv_csr {
    /* user-level CSR */
@@ -225,10 +242,6 @@ struct vm_info {
 };
 struct feature_config {
    uint64_t clic_base{0xc0000000};
    unsigned clic_int_ctl_bits{4};
    unsigned clic_num_irq{16};
    unsigned clic_num_trigger{0};
    uint64_t tcm_base{0x10000000};
    uint64_t tcm_size{0x8000};
    uint64_t io_address{0xf0000000};
@@ -261,101 +274,579 @@ public:
    : trap_access(15 << 16, badaddr) {}
 };
-inline void read_reg_uint32(uint64_t offs, uint32_t& reg, uint8_t* const data, unsigned length) {
+template <typename WORD_TYPE> struct priv_if {
-    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    using rd_csr_f = std::function<iss::status(unsigned addr, WORD_TYPE&)>;
-    switch(offs & 0x3) {
+    using wr_csr_f = std::function<iss::status(unsigned addr, WORD_TYPE)>;
    case 0:
        for(auto i = 0U; i < length; ++i)
            *(data + i) = *(reg_ptr + i);
        break;
    case 1:
        for(auto i = 0U; i < length; ++i)
            *(data + i) = *(reg_ptr + 1 + i);
        break;
    case 2:
        for(auto i = 0U; i < length; ++i)
            *(data + i) = *(reg_ptr + 2 + i);
        break;
    case 3:
        *data = *(reg_ptr + 3);
        break;
    }
 }
-inline void write_reg_uint32(uint64_t offs, uint32_t& reg, const uint8_t* const data, unsigned length) {
+    std::function<iss::status(unsigned, WORD_TYPE&)> read_csr;
-    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    std::function<iss::status(unsigned, WORD_TYPE)> write_csr;
-    switch(offs & 0x3) {
+    std::function<iss::status(uint8_t const*)> exec_htif;
-    case 0:
+    std::unordered_map<unsigned, rd_csr_f>& csr_rd_cb;
-        for(auto i = 0U; i < length; ++i)
+    std::unordered_map<unsigned, wr_csr_f>& csr_wr_cb;
-            *(reg_ptr + i) = *(data + i);
+    hart_state<WORD_TYPE>& mstatus;
-        break;
+    uint64_t& tohost;
-    case 1:
+    uint64_t& fromhost;
-        for(auto i = 0U; i < length; ++i)
+    unsigned& mcause_max_irq;
-            *(reg_ptr + 1 + i) = *(data + i);
+};
-        break;
+
-    case 2:
+template <typename BASE, typename LOGCAT = logging::disass> struct riscv_hart_common : public BASE, public mmio::memory_elem {
-        for(auto i = 0U; i < length; ++i)
+    const std::array<const char, 4> lvl = {{'U', 'S', 'H', 'M'}};
-            *(reg_ptr + 2 + i) = *(data + i);
+    const std::array<const char*, 16> trap_str = {{""
-        break;
+                                                   "Instruction address misaligned", // 0
-    case 3:
+                                                   "Instruction access fault",       // 1
-        *(reg_ptr + 3) = *data;
+                                                   "Illegal instruction",            // 2
-        break;
+                                                   "Breakpoint",                     // 3
                                                   "Load address misaligned",        // 4
                                                   "Load access fault",              // 5
                                                   "Store/AMO address misaligned",   // 6
                                                   "Store/AMO access fault",         // 7
                                                   "Environment call from U-mode",   // 8
                                                   "Environment call from S-mode",   // 9
                                                   "Reserved",                       // a
                                                   "Environment call from M-mode",   // b
                                                   "Instruction page fault",         // c
                                                   "Load page fault",                // d
                                                   "Reserved",                       // e
                                                   "Store/AMO page fault"}};
    const std::array<const char*, 12> irq_str = {{"User software interrupt", "Supervisor software interrupt", "Reserved",
                                                  "Machine software interrupt", "User timer interrupt", "Supervisor timer interrupt",
                                                  "Reserved", "Machine timer interrupt", "User external interrupt",
                                                  "Supervisor external interrupt", "Reserved", "Machine external interrupt"}};
    constexpr static unsigned MEM = traits<BASE>::MEM;
    using core = BASE;
    using this_class = riscv_hart_common<BASE, LOGCAT>;
    using phys_addr_t = typename core::phys_addr_t;
    using reg_t = typename core::reg_t;
    using addr_t = typename core::addr_t;
    using rd_csr_f = std::function<iss::status(unsigned addr, reg_t&)>;
    using wr_csr_f = std::function<iss::status(unsigned addr, reg_t)>;
 #define MK_CSR_RD_CB(FCT) [this](unsigned a, reg_t& r) -> iss::status { return this->FCT(a, r); };
 #define MK_CSR_WR_CB(FCT) [this](unsigned a, reg_t r) -> iss::status { return this->FCT(a, r); };
    riscv_hart_common()
    : state()
    , instr_if(*this) {
        // reset values
        csr[misa] = traits<BASE>::MISA_VAL;
        csr[mvendorid] = 0x669;
        csr[marchid] = traits<BASE>::MARCHID_VAL;
        csr[mimpid] = 1;
        if(traits<BASE>::FLEN > 0) {
            csr_rd_cb[fcsr] = MK_CSR_RD_CB(read_fcsr);
            csr_wr_cb[fcsr] = MK_CSR_WR_CB(write_fcsr);
            csr_rd_cb[fflags] = MK_CSR_RD_CB(read_fcsr);
            csr_wr_cb[fflags] = MK_CSR_WR_CB(write_fcsr);
            csr_rd_cb[frm] = MK_CSR_RD_CB(read_fcsr);
            csr_wr_cb[frm] = MK_CSR_WR_CB(write_fcsr);
        }
        for(unsigned addr = mhpmcounter3; addr <= mhpmcounter31; ++addr) {
            csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
            csr_wr_cb[addr] = MK_CSR_WR_CB(write_plain);
        }
        if(traits<BASE>::XLEN == 32)
            for(unsigned addr = mhpmcounter3h; addr <= mhpmcounter31h; ++addr) {
                csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
                csr_wr_cb[addr] = MK_CSR_WR_CB(write_plain);
            }
        for(unsigned addr = mhpmevent3; addr <= mhpmevent31; ++addr) {
            csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
            csr_wr_cb[addr] = MK_CSR_WR_CB(write_plain);
        }
        for(unsigned addr = hpmcounter3; addr <= hpmcounter31; ++addr) {
            csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
        }
        if(traits<BASE>::XLEN == 32)
            for(unsigned addr = hpmcounter3h; addr <= hpmcounter31h; ++addr) {
                csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
            }
        // common regs
        const std::array<unsigned, 4> roaddrs{{misa, mvendorid, marchid, mimpid}};
        for(auto addr : roaddrs) {
            csr_rd_cb[addr] = MK_CSR_RD_CB(read_plain);
            csr_wr_cb[addr] = MK_CSR_WR_CB(write_null);
        }
        // special handling & overrides
        csr_rd_cb[time] = MK_CSR_RD_CB(read_time);
        if(traits<BASE>::XLEN == 32)
            csr_rd_cb[timeh] = MK_CSR_RD_CB(read_time);
        csr_rd_cb[cycle] = MK_CSR_RD_CB(read_cycle);
        if(traits<BASE>::XLEN == 32)
            csr_rd_cb[cycleh] = MK_CSR_RD_CB(read_cycle);
        csr_rd_cb[instret] = MK_CSR_RD_CB(read_instret);
        if(traits<BASE>::XLEN == 32)
            csr_rd_cb[instreth] = MK_CSR_RD_CB(read_instret);
        csr_rd_cb[mcycle] = MK_CSR_RD_CB(read_cycle);
        csr_wr_cb[mcycle] = MK_CSR_WR_CB(write_cycle);
        if(traits<BASE>::XLEN == 32)
            csr_rd_cb[mcycleh] = MK_CSR_RD_CB(read_cycle);
        if(traits<BASE>::XLEN == 32)
            csr_wr_cb[mcycleh] = MK_CSR_WR_CB(write_cycle);
        csr_rd_cb[minstret] = MK_CSR_RD_CB(read_instret);
        csr_wr_cb[minstret] = MK_CSR_WR_CB(write_instret);
        if(traits<BASE>::XLEN == 32)
            csr_rd_cb[minstreth] = MK_CSR_RD_CB(read_instret);
        if(traits<BASE>::XLEN == 32)
            csr_wr_cb[minstreth] = MK_CSR_WR_CB(write_instret);
        csr_rd_cb[mhartid] = MK_CSR_RD_CB(read_hartid);
    };
    ~riscv_hart_common() {
        if(io_buf.str().length()) {
            CPPLOG(INFO) << "tohost send '" << io_buf.str() << "'";
        }
    }
 }
 struct riscv_hart_common {
    riscv_hart_common(){};
    ~riscv_hart_common(){};
    std::unordered_map<std::string, uint64_t> symbol_table;
    uint64_t entry_address{0};
    uint64_t tohost = std::numeric_limits<uint64_t>::max();
    uint64_t fromhost = std::numeric_limits<uint64_t>::max();
    std::stringstream io_buf;
-    std::unordered_map<std::string, uint64_t> get_sym_table(std::string name) {
+    void set_semihosting_callback(semihosting_cb_t<reg_t> cb) { semihosting_cb = cb; };
-        if(!symbol_table.empty())
+
-            return symbol_table;
+    std::pair<uint64_t, bool> load_file(std::string name, int type) {
-        FILE* fp = fopen(name.c_str(), "r");
+        return std::make_pair(entry_address, read_elf_file(name, sizeof(reg_t) == 4 ? ELFIO::ELFCLASS32 : ELFIO::ELFCLASS64));
-        if(fp) {
+    }
-            std::array<char, 5> buf;
+
-            auto n = fread(buf.data(), 1, 4, fp);
+    bool read_elf_file(std::string name, uint8_t expected_elf_class) {
-            fclose(fp);
+        // Create elfio reader
-            if(n != 4)
+        ELFIO::elfio reader;
-                throw std::runtime_error("input file has insufficient size");
+        // Load ELF data
-            buf[4] = 0;
+        if(reader.load(name)) {
-            if(strcmp(buf.data() + 1, "ELF") == 0) {
+            // check elf properties
-                // Create elfio reader
+            if(reader.get_class() != expected_elf_class)
-                ELFIO::elfio reader;
+                return false;
-                // Load ELF data
+            if(reader.get_type() != ELFIO::ET_EXEC)
-                if(!reader.load(name))
+                return false;
-                    throw std::runtime_error("could not process elf file");
+            if(reader.get_machine() != ELFIO::EM_RISCV)
-                // check elf properties
+                return false;
-                if(reader.get_type() != ET_EXEC)
+            entry_address = reader.get_entry();
-                    throw std::runtime_error("wrong elf type in file");
+            for(const auto& pseg : reader.segments) {
-                if(reader.get_machine() != EM_RISCV)
+                const auto fsize = pseg->get_file_size(); // 0x42c/0x0
-                    throw std::runtime_error("wrong elf machine in file");
+                const auto seg_data = pseg->get_data();
-                const auto sym_sec = reader.sections[".symtab"];
+                const auto type = pseg->get_type();
-                if(SHT_SYMTAB == sym_sec->get_type() || SHT_DYNSYM == sym_sec->get_type()) {
+                if(type == ELFIO::PT_LOAD && fsize > 0) {
-                    ELFIO::symbol_section_accessor symbols(reader, sym_sec);
+                    auto res = this->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_WRITE, traits<BASE>::MEM,
-                    auto sym_no = symbols.get_symbols_num();
+                                           pseg->get_physical_address(), fsize, reinterpret_cast<const uint8_t* const>(seg_data));
-                    std::string name;
+                    if(res != iss::Ok)
-                    ELFIO::Elf64_Addr value = 0;
+                        CPPLOG(ERR) << "problem writing " << fsize << "bytes to 0x" << std::hex << pseg->get_physical_address();
-                    ELFIO::Elf_Xword size = 0;
+                }
-                    unsigned char bind = 0;
+            }
-                    unsigned char type = 0;
+            const auto sym_sec = reader.sections[".symtab"];
-                    ELFIO::Elf_Half section = 0;
+            if(ELFIO::SHT_SYMTAB == sym_sec->get_type() || ELFIO::SHT_DYNSYM == sym_sec->get_type()) {
-                    unsigned char other = 0;
+                ELFIO::symbol_section_accessor symbols(reader, sym_sec);
-                    for(auto i = 0U; i < sym_no; ++i) {
+                auto sym_no = symbols.get_symbols_num();
-                        symbols.get_symbol(i, name, value, size, bind, type, section, other);
+                std::string name;
-                        if(name != "") {
+                ELFIO::Elf64_Addr value = 0;
-                            this->symbol_table[name] = value;
+                ELFIO::Elf_Xword size = 0;
                unsigned char bind = 0;
                unsigned char type = 0;
                ELFIO::Elf_Half section = 0;
                unsigned char other = 0;
                for(auto i = 0U; i < sym_no; ++i) {
                    symbols.get_symbol(i, name, value, size, bind, type, section, other);
                    if(name != "") {
                        this->symbol_table[name] = value;
 #ifndef NDEBUG
-                            CPPLOG(DEBUG) << "Found Symbol " << name;
+                        CPPLOG(DEBUG) << "Found Symbol " << name;
 #endif
                        }
                    }
                }
-                return symbol_table;
+                try {
                    tohost = symbol_table.at("tohost");
                } catch(std::out_of_range& e) {
                }
                try {
                    fromhost = symbol_table.at("fromhost");
                } catch(std::out_of_range& e) {
                }
            }
-            throw std::runtime_error(fmt::format("memory load file {} is not a valid elf file", name));
+            return true;
-        } else
+        }
-            throw std::runtime_error(fmt::format("memory load file not found, check if {} is a valid file", name));
+        return false;
    };
    iss::status execute_sys_write(arch_if* aif, const std::array<uint64_t, 8>& loaded_payload, unsigned mem_type) {
        uint64_t fd = loaded_payload[1];
        uint64_t buf_ptr = loaded_payload[2];
        uint64_t len = loaded_payload[3];
        std::vector<char> buf(len);
        if(aif->read(address_type::PHYSICAL, access_type::DEBUG_READ, mem_type, buf_ptr, len, reinterpret_cast<uint8_t*>(buf.data()))) {
            CPPLOG(ERR) << "SYS_WRITE buffer read went wrong";
            return iss::Err;
        }
        // we disregard the fd and just log to stdout
        for(size_t i = 0; i < len; i++) {
            if(buf[i] == '\n' || buf[i] == '\0') {
                CPPLOG(INFO) << "tohost send '" << io_buf.str() << "'";
                io_buf.str("");
            } else
                io_buf << buf[i];
        }
        // Not sure what the correct return value should be
        uint8_t ret_val = 1;
        if(fromhost != std::numeric_limits<uint64_t>::max())
            if(aif->write(address_type::PHYSICAL, access_type::DEBUG_WRITE, mem_type, fromhost, 1, &ret_val)) {
                CPPLOG(ERR) << "Fromhost write went wrong";
                return iss::Err;
            }
        return iss::Ok;
    }
    constexpr bool has_compressed() { return traits<BASE>::MISA_VAL & 0b0100; }
    constexpr reg_t get_pc_mask() { return has_compressed() ? (reg_t)~1 : (reg_t)~3; }
    void disass_output(uint64_t pc, const std::string instr) override {
        // NSCLOG(INFO, LOGCAT) << fmt::format("0x{:016x}    {:40} [p:{};s:0x{:x};c:{}]", pc, instr, lvl[this->reg.PRIV],
        // (reg_t)state.mstatus,
        //                                     this->reg.cycle + cycle_offset);
        NSCLOG(INFO, LOGCAT) << fmt::format("0x{:016x}    {:40} [p:{};c:{}]", pc, instr, lvl[this->reg.PRIV],
                                            this->reg.cycle + cycle_offset);
    };
    void register_csr(unsigned addr, rd_csr_f f) { csr_rd_cb[addr] = f; }
    void register_csr(unsigned addr, wr_csr_f f) { csr_wr_cb[addr] = f; }
    void register_csr(unsigned addr, rd_csr_f rdf, wr_csr_f wrf) {
        csr_rd_cb[addr] = rdf;
        csr_wr_cb[addr] = wrf;
    }
    void unregister_csr_rd(unsigned addr) { csr_rd_cb.erase(addr); }
    void unregister_csr_wr(unsigned addr) { csr_wr_cb.erase(addr); }
    bool debug_mode_active() { return this->reg.PRIV & 0x4; }
    const reg_t& get_mhartid() const { return mhartid_reg; }
    void set_mhartid(reg_t mhartid) { mhartid_reg = mhartid; };
    iss::status read_csr(unsigned addr, reg_t& val) {
        if(addr >= csr.size())
            return iss::Err;
        auto req_priv_lvl = (addr >> 8) & 0x3;
        if(this->reg.PRIV < req_priv_lvl) // not having required privileges
            throw illegal_instruction_fault(this->fault_data);
        auto it = csr_rd_cb.find(addr);
        if(it == csr_rd_cb.end() || !it->second) // non existent register
            throw illegal_instruction_fault(this->fault_data);
        return it->second(addr, val);
    }
    iss::status write_csr(unsigned addr, reg_t val) {
        if(addr >= csr.size())
            return iss::Err;
        auto req_priv_lvl = (addr >> 8) & 0x3;
        if(this->reg.PRIV < req_priv_lvl) // not having required privileges
            throw illegal_instruction_fault(this->fault_data);
        if((addr & 0xc00) == 0xc00) // writing to read-only region
            throw illegal_instruction_fault(this->fault_data);
        auto it = csr_wr_cb.find(addr);
        if(it == csr_wr_cb.end() || !it->second) // non existent register
            throw illegal_instruction_fault(this->fault_data);
        return it->second(addr, val);
    }
    iss::status read_null(unsigned addr, reg_t& val) {
        val = 0;
        return iss::Ok;
    }
    iss::status write_null(unsigned addr, reg_t val) { return iss::status::Ok; }
    iss::status read_plain(unsigned addr, reg_t& val) {
        val = csr[addr];
        return iss::Ok;
    }
    iss::status write_plain(unsigned addr, reg_t val) {
        csr[addr] = val;
        return iss::Ok;
    }
    iss::status read_cycle(unsigned addr, reg_t& val) {
        auto cycle_val = this->reg.cycle + cycle_offset;
        if(addr == mcycle) {
            val = static_cast<reg_t>(cycle_val);
        } else if(addr == mcycleh) {
            val = static_cast<reg_t>(cycle_val >> 32);
        }
        return iss::Ok;
    }
    iss::status write_cycle(unsigned addr, reg_t val) {
        if(sizeof(typename traits<BASE>::reg_t) != 4) {
            mcycle_csr = static_cast<uint64_t>(val);
        } else {
            if(addr == mcycle) {
                mcycle_csr = (mcycle_csr & 0xffffffff00000000) + val;
            } else {
                mcycle_csr = (static_cast<uint64_t>(val) << 32) + (mcycle_csr & 0xffffffff);
            }
        }
        cycle_offset = mcycle_csr - this->reg.cycle; // TODO: relying on wrap-around
        return iss::Ok;
    }
    iss::status read_instret(unsigned addr, reg_t& val) {
        if((addr & 0xff) == (minstret & 0xff)) {
            val = static_cast<reg_t>(this->reg.instret);
        } else if((addr & 0xff) == (minstreth & 0xff)) {
            val = static_cast<reg_t>(this->reg.instret >> 32);
        }
        return iss::Ok;
    }
    iss::status write_instret(unsigned addr, reg_t val) {
        if(sizeof(typename traits<BASE>::reg_t) != 4) {
            this->reg.instret = static_cast<uint64_t>(val);
        } else {
            if((addr & 0xff) == (minstret & 0xff)) {
                this->reg.instret = (this->reg.instret & 0xffffffff00000000) + val;
            } else {
                this->reg.instret = (static_cast<uint64_t>(val) << 32) + (this->reg.instret & 0xffffffff);
            }
        }
        this->reg.instret--;
        return iss::Ok;
    }
    iss::status read_time(unsigned addr, reg_t& val) {
        uint64_t time_val = this->reg.cycle / (100000000 / 32768 - 1); //-> ~3052;
        if(addr == time) {
            val = static_cast<reg_t>(time_val);
        } else if(addr == timeh) {
            if(sizeof(typename traits<BASE>::reg_t) != 4)
                return iss::Err;
            val = static_cast<reg_t>(time_val >> 32);
        }
        return iss::Ok;
    }
    iss::status read_tvec(unsigned addr, reg_t& val) {
        val = csr[addr] & ~2;
        return iss::Ok;
    }
    iss::status read_hartid(unsigned addr, reg_t& val) {
        val = mhartid_reg;
        return iss::Ok;
    }
    iss::status write_epc(unsigned addr, reg_t val) {
        csr[addr] = val & get_pc_mask();
        return iss::Ok;
    }
    iss::status write_dcsr(unsigned addr, reg_t val) {
        if(!debug_mode_active())
            throw illegal_instruction_fault(this->fault_data);
        //                  +-------------- ebreakm
        //                  |   +---------- stepi
        //                  |   |  +++----- cause
        //                  |   |  |||   +- step
        csr[addr] = val & 0b1000100111000100U;
        return iss::Ok;
    }
    iss::status read_debug(unsigned addr, reg_t& val) {
        if(!debug_mode_active())
            throw illegal_instruction_fault(this->fault_data);
        val = csr[addr];
        return iss::Ok;
    }
    iss::status write_dscratch(unsigned addr, reg_t val) {
        if(!debug_mode_active())
            throw illegal_instruction_fault(this->fault_data);
        csr[addr] = val;
        return iss::Ok;
    }
    iss::status read_dpc(unsigned addr, reg_t& val) {
        if(!debug_mode_active())
            throw illegal_instruction_fault(this->fault_data);
        val = this->reg.DPC;
        return iss::Ok;
    }
    iss::status write_dpc(unsigned addr, reg_t val) {
        if(!debug_mode_active())
            throw illegal_instruction_fault(this->fault_data);
        this->reg.DPC = val;
        return iss::Ok;
    }
    iss::status read_fcsr(unsigned addr, reg_t& val) {
        switch(addr) {
        case 1: // fflags, 4:0
            val = bit_sub<0, 5>(this->get_fcsr());
            break;
        case 2: // frm, 7:5
            val = bit_sub<5, 3>(this->get_fcsr());
            break;
        case 3: // fcsr
            val = this->get_fcsr();
            break;
        default:
            return iss::Err;
        }
        return iss::Ok;
    }
    iss::status write_fcsr(unsigned addr, reg_t val) {
        switch(addr) {
        case 1: // fflags, 4:0
            this->set_fcsr((this->get_fcsr() & 0xffffffe0) | (val & 0x1f));
            break;
        case 2: // frm, 7:5
            this->set_fcsr((this->get_fcsr() & 0xffffff1f) | ((val & 0x7) << 5));
            break;
        case 3: // fcsr
            this->set_fcsr(val & 0xff);
            break;
        default:
            return iss::Err;
        }
        return iss::Ok;
    }
    priv_if<reg_t> get_priv_if() {
        return priv_if<reg_t>{.read_csr = [this](unsigned addr, reg_t& val) -> iss::status { return read_csr(addr, val); },
                              .write_csr = [this](unsigned addr, reg_t val) -> iss::status { return write_csr(addr, val); },
                              .exec_htif = [this](uint8_t const* data) -> iss::status { return execute_htif(data); },
                              .csr_rd_cb{this->csr_rd_cb},
                              .csr_wr_cb{csr_wr_cb},
                              .mstatus{this->state},
                              .tohost{this->tohost},
                              .fromhost{this->fromhost},
                              .mcause_max_irq{mcause_max_irq}};
    }
    iss::status execute_htif(uint8_t const* data) {
        reg_t cur_data = *reinterpret_cast<const reg_t*>(data);
        // Extract Device (bits 63:56)
        uint8_t device = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 56) & 0xFF;
        // Extract Command (bits 55:48)
        uint8_t command = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 48) & 0xFF;
        // Extract payload (bits 47:0)
        uint64_t payload_addr = cur_data & 0xFFFFFFFFFFFFULL;
        if(payload_addr & 1) {
            CPPLOG(FATAL) << "this->tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr
                          << "), stopping simulation";
            this->reg.trap_state = std::numeric_limits<uint32_t>::max();
            this->interrupt_sim = payload_addr;
            return iss::Ok;
        } else if(device == 0 && command == 0) {
            std::array<uint64_t, 8> loaded_payload;
            if(memory.rd_mem(access_type::DEBUG_READ, payload_addr, 8 * sizeof(uint64_t),
                             reinterpret_cast<uint8_t*>(loaded_payload.data())) == iss::Err)
                CPPLOG(ERR) << "Syscall read went wrong";
            uint64_t syscall_num = loaded_payload.at(0);
            if(syscall_num == 64) { // SYS_WRITE
                return this->execute_sys_write(this, loaded_payload, traits<BASE>::MEM);
            } else {
                CPPLOG(ERR) << "this->tohost syscall with number 0x" << std::hex << syscall_num << std::dec << " (" << syscall_num
                            << ") not implemented";
                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
                this->interrupt_sim = payload_addr;
                return iss::Ok;
            }
        } else {
            CPPLOG(ERR) << "this->tohost functionality not implemented for device " << device << " and command " << command;
            this->reg.trap_state = std::numeric_limits<uint32_t>::max();
            this->interrupt_sim = payload_addr;
            return iss::Ok;
        }
    }
    mmio::memory_hierarchy memories;
    virtual mmio::memory_if get_mem_if() override {
        assert(false || "This function should nevver be called");
        return mmio::memory_if{};
    }
    virtual void set_next(mmio::memory_if mem_if) { memory = mem_if; };
    void set_irq_num(unsigned i) { mcause_max_irq = 1 << util::ilog2(i); }
 protected:
    hart_state<reg_t> state;
    static constexpr reg_t get_mstatus_mask_t(unsigned priv_lvl = PRIV_M) {
        if(sizeof(reg_t) == 4) {
            return priv_lvl == PRIV_U ? 0x80000011UL :   // 0b1...0 0001 0001
                       priv_lvl == PRIV_S ? 0x800de133UL // 0b0...0 0001 1000 1001 1001;
                                          : 0x807ff9ddUL;
        } else {
            return priv_lvl == PRIV_U ? 0x011ULL : // 0b1...0 0001 0001
                       priv_lvl == PRIV_S ? 0x000de133ULL
                                          : 0x007ff9ddULL;
        }
    }
    mmio::memory_if memory;
    struct riscv_instrumentation_if : public iss::instrumentation_if {
        riscv_instrumentation_if(riscv_hart_common<BASE, LOGCAT>& arch)
        : arch(arch) {}
        /**
         * get the name of this architecture
         *
         * @return the name of this architecture
         */
        const std::string core_type_name() const override { return traits<BASE>::core_type; }
        uint64_t get_pc() override { return arch.reg.PC; }
        uint64_t get_next_pc() override { return arch.reg.NEXT_PC; }
        uint64_t get_instr_word() override { return arch.reg.instruction; }
        uint64_t get_instr_count() override { return arch.reg.icount; }
        uint64_t get_pendig_traps() override { return arch.reg.trap_state; }
        uint64_t get_total_cycles() override { return arch.reg.cycle + arch.cycle_offset; }
        void update_last_instr_cycles(unsigned cycles) override { arch.cycle_offset += cycles - 1; }
        bool is_branch_taken() override { return arch.reg.last_branch; }
        unsigned get_reg_num() override { return traits<BASE>::NUM_REGS; }
        unsigned get_reg_size(unsigned num) override { return traits<BASE>::reg_bit_widths[num]; }
        std::unordered_map<std::string, uint64_t> const& get_symbol_table(std::string name) override { return arch.symbol_table; }
        riscv_hart_common<BASE, LOGCAT>& arch;
    };
    friend struct riscv_instrumentation_if;
    riscv_instrumentation_if instr_if;
    instrumentation_if* get_instrumentation_if() override { return &instr_if; };
    using csr_type = util::sparse_array<typename traits<BASE>::reg_t, 1ULL << 12, 12>;
    using csr_page_type = typename csr_type::page_type;
    csr_type csr;
    std::unordered_map<unsigned, rd_csr_f> csr_rd_cb;
    std::unordered_map<unsigned, wr_csr_f> csr_wr_cb;
    reg_t mhartid_reg{0x0};
    uint64_t mcycle_csr{0};
    uint64_t minstret_csr{0};
    reg_t fault_data;
    int64_t cycle_offset{0};
    int64_t instret_offset{0};
    semihosting_cb_t<reg_t> semihosting_cb;
    std::array<vm_info, 2> vm;
    unsigned mcause_max_irq{16U};
 };
 } // namespace arch
--- a/src/iss/arch/riscv_hart_m_p.h
+++ b/src/iss/arch/riscv_hart_m_p.h
--- a/src/iss/arch/riscv_hart_msu_vp.h
+++ b/src/iss/arch/riscv_hart_msu_vp.h
--- a/src/iss/arch/riscv_hart_mu_p.h
+++ b/src/iss/arch/riscv_hart_mu_p.h
--- a/src/iss/arch/tgc5c.cpp
+++ b/src/iss/arch/tgc5c.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017 - 2020 MINRES Technologies GmbH
+ * Copyright (C) 2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
--- a/src/iss/arch/tgc5c.h
+++ b/src/iss/arch/tgc5c.h
--- a/src/iss/arch/wt_cache.h
+++ b/src/iss/arch/wt_cache.h
@@ -87,7 +87,7 @@ public:
    virtual ~wt_cache() = default;
    unsigned size{4096};
-    unsigned line_sz{32};
+    unsigned line_sz{64};
    unsigned ways{1};
    uint64_t io_address{0xf0000000};
    uint64_t io_addr_mask{0xf0000000};
@@ -119,7 +119,7 @@ template <typename BASE> iss::status iss::arch::wt_cache<BASE>::read_cache(phys_
        icache_ptr.reset(new cache::cache(size, line_sz, ways));
        dcache_ptr.reset(new cache::cache(size, line_sz, ways));
    }
-    if((a.val & io_addr_mask) != io_address) {
+    if((a.access & iss::access_type::FETCH) == iss::access_type::FETCH || (a.val & io_addr_mask) != io_address) {
        auto set_addr = (a.val & (size - 1)) >> util::ilog2(line_sz * ways);
        auto tag_addr = a.val >> util::ilog2(line_sz);
        auto& set = (is_fetch(a.access) ? icache_ptr : dcache_ptr)->sets[set_addr];
--- a/src/iss/debugger/csr_names.cpp
+++ b/src/iss/debugger/csr_names.cpp
--- a/src/iss/debugger/riscv_target_adapter.h
+++ b/src/iss/debugger/riscv_target_adapter.h
@@ -30,8 +30,8 @@
 *
 *******************************************************************************/
-#ifndef _ISS_DEBUGGER_RISCV_TARGET_ADAPTER_H_
+#ifndef _ISS_ARCH_DEBUGGER_RISCV_TARGET_ADAPTER_H_
-#define _ISS_DEBUGGER_RISCV_TARGET_ADAPTER_H_
+#define _ISS_ARCH_DEBUGGER_RISCV_TARGET_ADAPTER_H_
 #include "iss/arch_if.h"
 #include <iss/arch/traits.h>
@@ -48,6 +48,10 @@
 namespace iss {
 namespace debugger {
 char const* const get_csr_name(unsigned);
 constexpr auto csr_offset = 100U;
 using namespace iss::arch;
 using namespace iss::debugger;
@@ -129,11 +133,17 @@ public:
 protected:
    static inline constexpr addr_t map_addr(const addr_t& i) { return i; }
-
+    std::string csr_xml;
    iss::arch_if* core;
    rp_thread_ref thread_idx;
 };
 template <typename ARCH> typename std::enable_if<iss::arch::traits<ARCH>::FLEN != 0, unsigned>::type get_f0_offset() {
    return iss::arch::traits<ARCH>::F0;
 }
 template <typename ARCH> typename std::enable_if<iss::arch::traits<ARCH>::FLEN == 0, unsigned>::type get_f0_offset() { return 0; }
 template <typename ARCH> status riscv_target_adapter<ARCH>::set_gen_thread(rp_thread_ref& thread) {
    thread_idx = thread;
    return Ok;
@@ -175,34 +185,37 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::current_thread_query
 template <typename ARCH> status riscv_target_adapter<ARCH>::read_registers(std::vector<uint8_t>& data, std::vector<uint8_t>& avail) {
    CPPLOG(TRACE) << "reading target registers";
    // return idx<0?:;
    data.clear();
    avail.clear();
    const uint8_t* reg_base = core->get_regs_base_ptr();
    auto start_reg = arch::traits<ARCH>::X0;
-    for(size_t reg_no = start_reg; reg_no < start_reg + 33 /*arch::traits<ARCH>::NUM_REGS*/; ++reg_no) {
+    for(size_t i = 0; i < 33; ++i) {
-        auto reg_width = arch::traits<ARCH>::reg_bit_widths[reg_no] / 8;
+        if(i < arch::traits<ARCH>::RFS || i == arch::traits<ARCH>::PC) {
-        unsigned offset = traits<ARCH>::reg_byte_offsets[reg_no];
+            auto reg_no = i < 32 ? start_reg + i : arch::traits<ARCH>::PC;
-        for(size_t j = 0; j < reg_width; ++j) {
+            unsigned offset = traits<ARCH>::reg_byte_offsets[reg_no];
-            data.push_back(*(reg_base + offset + j));
+            for(size_t j = 0; j < arch::traits<ARCH>::XLEN / 8; ++j) {
-            avail.push_back(0xff);
+                data.push_back(*(reg_base + offset + j));
                avail.push_back(0xff);
            }
        } else {
            for(size_t j = 0; j < arch::traits<ARCH>::XLEN / 8; ++j) {
                data.push_back(0);
                avail.push_back(0);
            }
        }
    }
    if(iss::arch::traits<ARCH>::FLEN > 0) {
        auto fstart_reg = get_f0_offset<ARCH>();
        for(size_t i = 0; i < 32; ++i) {
            auto reg_no = fstart_reg + i;
            auto reg_width = arch::traits<ARCH>::reg_bit_widths[reg_no] / 8;
            unsigned offset = traits<ARCH>::reg_byte_offsets[reg_no];
            for(size_t j = 0; j < reg_width; ++j) {
                data.push_back(*(reg_base + offset + j));
                avail.push_back(0xff);
            }
        }
    }
    // work around fill with F type registers
    //    if (arch::traits<ARCH>::NUM_REGS < 65) {
    //        auto reg_width = sizeof(typename arch::traits<ARCH>::reg_t);
    //        for (size_t reg_no = 0; reg_no < 33; ++reg_no) {
    //            for (size_t j = 0; j < reg_width; ++j) {
    //                data.push_back(0x0);
    //                avail.push_back(0x00);
    //            }
    //            // if(arch::traits<ARCH>::XLEN < 64)
    //            //     for(unsigned j=0; j<4; ++j){
    //            //         data.push_back(0x0);
    //            //         avail.push_back(0x00);
    //            //     }
    //        }
    //    }
    return Ok;
 }
@@ -210,25 +223,25 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::write_registers(cons
    auto start_reg = arch::traits<ARCH>::X0;
    auto* reg_base = core->get_regs_base_ptr();
    auto iter = data.data();
-    bool e_ext = arch::traits<ARCH>::PC < 32;
+    auto iter_end = data.data() + data.size();
-    for(size_t reg_no = 0; reg_no < start_reg + 33 /*arch::traits<ARCH>::NUM_REGS*/; ++reg_no) {
+    for(size_t i = 0; i < 33 && iter < iter_end; ++i) {
-        if(e_ext && reg_no > 15) {
+        auto reg_width = arch::traits<ARCH>::XLEN / 8;
-            if(reg_no == 32) {
+        if(i < arch::traits<ARCH>::RFS) {
-                auto reg_width = arch::traits<ARCH>::reg_bit_widths[arch::traits<ARCH>::PC] / 8;
+            auto offset = traits<ARCH>::reg_byte_offsets[start_reg + i];
-                auto offset = traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::PC];
+            std::copy(iter, iter + reg_width, reg_base + offset);
-                std::copy(iter, iter + reg_width, reg_base);
+        } else if(i == 32) {
-            } else {
+            auto offset = traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::PC];
-                const uint64_t zero_val = 0;
+            std::copy(iter, iter + reg_width, reg_base + offset);
-                auto reg_width = arch::traits<ARCH>::reg_bit_widths[15] / 8;
+        }
-                auto iter = (uint8_t*)&zero_val;
+        iter += reg_width;
-                std::copy(iter, iter + reg_width, reg_base);
+    }
-            }
+    if(iss::arch::traits<ARCH>::FLEN > 0) {
-        } else {
+        auto fstart_reg = get_f0_offset<ARCH>();
-            auto reg_width = arch::traits<ARCH>::reg_bit_widths[reg_no] / 8;
+        auto reg_width = arch::traits<ARCH>::FLEN / 8;
-            auto offset = traits<ARCH>::reg_byte_offsets[reg_no];
+        for(size_t i = 0; i < 32 && iter < iter_end; ++i) {
-            std::copy(iter, iter + reg_width, reg_base);
+            unsigned offset = traits<ARCH>::reg_byte_offsets[fstart_reg + i];
-            iter += 4;
+            std::copy(iter, iter + reg_width, reg_base + offset);
-            reg_base += offset;
+            iter += reg_width;
        }
    }
    return Ok;
@@ -236,7 +249,7 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::write_registers(cons
 template <typename ARCH>
 status riscv_target_adapter<ARCH>::read_single_register(unsigned int reg_no, std::vector<uint8_t>& data, std::vector<uint8_t>& avail) {
-    if(reg_no < 65) {
+    if(reg_no < csr_offset) {
        // auto reg_size = arch::traits<ARCH>::reg_bit_width(static_cast<typename
        // arch::traits<ARCH>::reg_e>(reg_no))/8;
        auto* reg_base = core->get_regs_base_ptr();
@@ -247,23 +260,24 @@ status riscv_target_adapter<ARCH>::read_single_register(unsigned int reg_no, std
        std::copy(reg_base + offset, reg_base + offset + reg_width, data.begin());
        std::fill(avail.begin(), avail.end(), 0xff);
    } else {
-        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_READ, traits<ARCH>::CSR, reg_no - 65);
+        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_READ, traits<ARCH>::CSR, reg_no - csr_offset);
        data.resize(sizeof(typename traits<ARCH>::reg_t));
        avail.resize(sizeof(typename traits<ARCH>::reg_t));
        std::fill(avail.begin(), avail.end(), 0xff);
        core->read(a, data.size(), data.data());
        std::fill(avail.begin(), avail.end(), 0xff);
    }
    return data.size() > 0 ? Ok : Err;
 }
 template <typename ARCH> status riscv_target_adapter<ARCH>::write_single_register(unsigned int reg_no, const std::vector<uint8_t>& data) {
-    if(reg_no < 65) {
+    if(reg_no < csr_offset) {
        auto* reg_base = core->get_regs_base_ptr();
        auto reg_width = arch::traits<ARCH>::reg_bit_widths[static_cast<typename arch::traits<ARCH>::reg_e>(reg_no)] / 8;
        auto offset = traits<ARCH>::reg_byte_offsets[reg_no];
        std::copy(data.begin(), data.begin() + reg_width, reg_base + offset);
    } else {
-        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_WRITE, traits<ARCH>::CSR, reg_no - 65);
+        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_WRITE, traits<ARCH>::CSR, reg_no - csr_offset);
        core->write(a, data.size(), data.data());
    }
    return Ok;
@@ -276,7 +290,7 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::read_mem(uint64_t ad
 }
 template <typename ARCH> status riscv_target_adapter<ARCH>::write_mem(uint64_t addr, const std::vector<uint8_t>& data) {
-    auto a = map_addr({iss::access_type::DEBUG_READ, iss::address_type::VIRTUAL, 0, addr});
+    auto a = map_addr({iss::access_type::DEBUG_WRITE, iss::address_type::VIRTUAL, 0, addr});
    auto f = [&]() -> status { return core->write(a, data.size(), data.data()); };
    return srv->execute_syncronized(f);
 }
@@ -369,93 +383,57 @@ status riscv_target_adapter<ARCH>::resume_from_addr(bool step, int sig, uint64_t
 }
 template <typename ARCH> status riscv_target_adapter<ARCH>::target_xml_query(std::string& out_buf) {
-    const std::string res{"<?xml version=\"1.0\"?><!DOCTYPE target SYSTEM \"gdb-target.dtd\">"
+    if(!csr_xml.size()) {
-                          "<target><architecture>riscv:rv32</architecture>"
+        std::ostringstream oss;
-                          //"  <feature name=\"org.gnu.gdb.riscv.rv32i\">\n"
+        oss << "<?xml version=\"1.0\"?><!DOCTYPE feature SYSTEM \"gdb-target.dtd\"><target version=\"1.0\">\n";
-                          //"    <reg name=\"x0\"  bitsize=\"32\" group=\"general\"/>\n"
+        if(iss::arch::traits<ARCH>::XLEN == 32)
-                          //"    <reg name=\"x1\"  bitsize=\"32\" group=\"general\"/>\n"
+            oss << "<architecture>riscv:rv32</architecture>\n";
-                          //"    <reg name=\"x2\"  bitsize=\"32\" group=\"general\"/>\n"
+        else if(iss::arch::traits<ARCH>::XLEN == 64)
-                          //"    <reg name=\"x3\"  bitsize=\"32\" group=\"general\"/>\n"
+            oss << "  <architectureriscv:rv64</architecture>\n";
-                          //"    <reg name=\"x4\"  bitsize=\"32\" group=\"general\"/>\n"
+        oss << "  <feature name=\"org.gnu.gdb.riscv.cpu\">\n";
-                          //"    <reg name=\"x5\"  bitsize=\"32\" group=\"general\"/>\n"
+        auto reg_base_num = iss::arch::traits<ARCH>::X0;
-                          //"    <reg name=\"x6\"  bitsize=\"32\" group=\"general\"/>\n"
+        for(auto i = 0U; i < iss::arch::traits<ARCH>::RFS; ++i) {
-                          //"    <reg name=\"x7\"  bitsize=\"32\" group=\"general\"/>\n"
+            oss << "    <reg name=\"x" << i << "\" bitsize=\"" << iss::arch::traits<ARCH>::reg_bit_widths[reg_base_num + i]
-                          //"    <reg name=\"x8\"  bitsize=\"32\" group=\"general\"/>\n"
+                << "\" type=\"int\" regnum=\"" << i << "\"/>\n";
-                          //"    <reg name=\"x9\"  bitsize=\"32\" group=\"general\"/>\n"
+        }
-                          //"    <reg name=\"x10\" bitsize=\"32\" group=\"general\"/>\n"
+        oss << "    <reg name=\"pc\" bitsize=\"" << iss::arch::traits<ARCH>::reg_bit_widths[iss::arch::traits<ARCH>::PC]
-                          //"    <reg name=\"x11\" bitsize=\"32\" group=\"general\"/>\n"
+            << "\" type=\"code_ptr\" regnum=\"" << 32U << "\"/>\n";
-                          //"    <reg name=\"x12\" bitsize=\"32\" group=\"general\"/>\n"
+        oss << "  </feature>\n";
-                          //"    <reg name=\"x13\" bitsize=\"32\" group=\"general\"/>\n"
+        if(iss::arch::traits<ARCH>::FLEN > 0) {
-                          //"    <reg name=\"x14\" bitsize=\"32\" group=\"general\"/>\n"
+            oss << "  <feature name=\"org.gnu.gdb.riscv.fpu\">\n";
-                          //"    <reg name=\"x15\" bitsize=\"32\" group=\"general\"/>\n"
+            auto reg_base_num = get_f0_offset<ARCH>();
-                          //"    <reg name=\"x16\" bitsize=\"32\" group=\"general\"/>\n"
+            auto type = iss::arch::traits<ARCH>::FLEN == 32 ? "ieee_single" : "riscv_double";
-                          //"    <reg name=\"x17\" bitsize=\"32\" group=\"general\"/>\n"
+            for(auto i = 0U; i < 32; ++i) {
-                          //"    <reg name=\"x18\" bitsize=\"32\" group=\"general\"/>\n"
+                oss << "    <reg name=\"f" << i << "\" bitsize=\"" << iss::arch::traits<ARCH>::reg_bit_widths[reg_base_num + i]
-                          //"    <reg name=\"x19\" bitsize=\"32\" group=\"general\"/>\n"
+                    << "\" type=\"" << type << "\" regnum=\"" << i + 33 << "\"/>\n";
-                          //"    <reg name=\"x20\" bitsize=\"32\" group=\"general\"/>\n"
+            }
-                          //"    <reg name=\"x21\" bitsize=\"32\" group=\"general\"/>\n"
+            oss << "    <reg name=\"fcsr\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN << "\" regnum=\"103\" type int/>\n";
-                          //"    <reg name=\"x22\" bitsize=\"32\" group=\"general\"/>\n"
+            oss << "    <reg name=\"fflags\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN << "\" regnum=\"101\" type int/>\n";
-                          //"    <reg name=\"x23\" bitsize=\"32\" group=\"general\"/>\n"
+            oss << "    <reg name=\"frm\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN << "\" regnum=\"102\" type int/>\n";
-                          //"    <reg name=\"x24\" bitsize=\"32\" group=\"general\"/>\n"
+            oss << "  </feature>\n";
-                          //"    <reg name=\"x25\" bitsize=\"32\" group=\"general\"/>\n"
+        }
-                          //"    <reg name=\"x26\" bitsize=\"32\" group=\"general\"/>\n"
+        oss << "  <feature name=\"org.gnu.gdb.riscv.csr\">\n";
-                          //"    <reg name=\"x27\" bitsize=\"32\" group=\"general\"/>\n"
+        std::vector<uint8_t> data;
-                          //"    <reg name=\"x28\" bitsize=\"32\" group=\"general\"/>\n"
+        std::vector<uint8_t> avail;
-                          //"    <reg name=\"x29\" bitsize=\"32\" group=\"general\"/>\n"
+        data.resize(sizeof(typename traits<ARCH>::reg_t));
-                          //"    <reg name=\"x30\" bitsize=\"32\" group=\"general\"/>\n"
+        avail.resize(sizeof(typename traits<ARCH>::reg_t));
-                          //"    <reg name=\"x31\" bitsize=\"32\" group=\"general\"/>\n"
+        for(auto i = 0U; i < 4096; ++i) {
-                          //"  </feature>\n"
+            typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_READ, traits<ARCH>::CSR, i);
-                          "</target>"};
+            std::fill(avail.begin(), avail.end(), 0xff);
-    out_buf = res;
+            auto res = core->read(a, data.size(), data.data());
            if(res == iss::Ok) {
                oss << "    <reg name=\"" << get_csr_name(i) << "\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN
                    << "\"  type=\"int\" regnum=\"" << (i + csr_offset) << "\"/>\n";
            }
        }
        oss << "  </feature>\n";
        oss << "</target>\n";
        csr_xml = oss.str();
    }
    out_buf = csr_xml;
    return Ok;
 }
 /*
 *
 <?xml version="1.0"?>
 <!DOCTYPE target SYSTEM "gdb-target.dtd">
 <target>
  <architecture>riscv:rv32</architecture>
  <feature name="org.gnu.gdb.riscv.rv32i">
    <reg name="x0"  bitsize="32" group="general"/>
    <reg name="x1"  bitsize="32" group="general"/>
    <reg name="x2"  bitsize="32" group="general"/>
    <reg name="x3"  bitsize="32" group="general"/>
    <reg name="x4"  bitsize="32" group="general"/>
    <reg name="x5"  bitsize="32" group="general"/>
    <reg name="x6"  bitsize="32" group="general"/>
    <reg name="x7"  bitsize="32" group="general"/>
    <reg name="x8"  bitsize="32" group="general"/>
    <reg name="x9"  bitsize="32" group="general"/>
    <reg name="x10" bitsize="32" group="general"/>
    <reg name="x11" bitsize="32" group="general"/>
    <reg name="x12" bitsize="32" group="general"/>
    <reg name="x13" bitsize="32" group="general"/>
    <reg name="x14" bitsize="32" group="general"/>
    <reg name="x15" bitsize="32" group="general"/>
    <reg name="x16" bitsize="32" group="general"/>
    <reg name="x17" bitsize="32" group="general"/>
    <reg name="x18" bitsize="32" group="general"/>
    <reg name="x19" bitsize="32" group="general"/>
    <reg name="x20" bitsize="32" group="general"/>
    <reg name="x21" bitsize="32" group="general"/>
    <reg name="x22" bitsize="32" group="general"/>
    <reg name="x23" bitsize="32" group="general"/>
    <reg name="x24" bitsize="32" group="general"/>
    <reg name="x25" bitsize="32" group="general"/>
    <reg name="x26" bitsize="32" group="general"/>
    <reg name="x27" bitsize="32" group="general"/>
    <reg name="x28" bitsize="32" group="general"/>
    <reg name="x29" bitsize="32" group="general"/>
    <reg name="x30" bitsize="32" group="general"/>
    <reg name="x31" bitsize="32" group="general"/>
  </feature>
 </target>
 */
 } // namespace debugger
 } // namespace iss
-#endif /* _ISS_DEBUGGER_RISCV_TARGET_ADAPTER_H_ */
+#endif /* _ISS_ARCH_DEBUGGER_RISCV_TARGET_ADAPTER_H_ */
--- a/src/iss/mmio/clic.h
+++ b/src/iss/mmio/clic.h
@@ -0,0 +1,252 @@
 #include "iss/arch/riscv_hart_common.h"
 #include "iss/vm_types.h"
 #include "memory_if.h"
 #include <util/logging.h>
 namespace iss {
 namespace mmio {
 struct clic_config {
    uint64_t clic_base{0xc0000000};
    unsigned clic_int_ctl_bits{4};
    unsigned clic_num_irq{16};
    unsigned clic_num_trigger{0};
    bool nmode{false};
 };
 inline void read_reg_with_offset(uint32_t reg, uint8_t offs, uint8_t* const data, unsigned length) {
    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
    switch(offs) {
    default:
        for(auto i = 0U; i < length; ++i)
            *(data + i) = *(reg_ptr + i);
        break;
    case 1:
        for(auto i = 0U; i < length; ++i)
            *(data + i) = *(reg_ptr + 1 + i);
        break;
    case 2:
        for(auto i = 0U; i < length; ++i)
            *(data + i) = *(reg_ptr + 2 + i);
        break;
    case 3:
        *data = *(reg_ptr + 3);
        break;
    }
 }
 inline void write_reg_with_offset(uint32_t& reg, uint8_t offs, const uint8_t* const data, unsigned length) {
    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
    switch(offs) {
    default:
        for(auto i = 0U; i < length; ++i)
            *(reg_ptr + i) = *(data + i);
        break;
    case 1:
        for(auto i = 0U; i < length; ++i)
            *(reg_ptr + 1 + i) = *(data + i);
        break;
    case 2:
        for(auto i = 0U; i < length; ++i)
            *(reg_ptr + 2 + i) = *(data + i);
        break;
    case 3:
        *(reg_ptr + 3) = *data;
        break;
    }
 }
 template <typename WORD_TYPE> struct clic : public memory_elem {
    using this_class = clic<WORD_TYPE>;
    using reg_t = WORD_TYPE;
    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
    clic(arch::priv_if<WORD_TYPE> hart_if, clic_config cfg)
    : hart_if(hart_if)
    , cfg(cfg) {
        clic_int_reg.resize(cfg.clic_num_irq, clic_int_reg_t{.raw = 0});
        clic_cfg_reg = 0x30;
        clic_mact_lvl = clic_mprev_lvl = (1 << (cfg.clic_int_ctl_bits)) - 1;
        clic_uact_lvl = clic_uprev_lvl = (1 << (cfg.clic_int_ctl_bits)) - 1;
        hart_if.csr_rd_cb[arch::mtvt] = MK_CSR_RD_CB(read_plain);
        hart_if.csr_wr_cb[arch::mtvt] = MK_CSR_WR_CB(write_xtvt);
        //        hart_if.csr_rd_cb[mxnti] = MK_CSR_RD_CB(read_plain(a,r);};
        //        hart_if.csr_wr_cb[mxnti] = MK_CSR_WR_CB(write_plain(a,r);};
        hart_if.csr_rd_cb[arch::mintstatus] = MK_CSR_RD_CB(read_intstatus);
        hart_if.csr_wr_cb[arch::mintstatus] = MK_CSR_WR_CB(write_null);
        //        hart_if.csr_rd_cb[mscratchcsw] = MK_CSR_RD_CB(read_plain(a,r);};
        //        hart_if.csr_wr_cb[mscratchcsw] = MK_CSR_WR_CB(write_plain(a,r);};
        //        hart_if.csr_rd_cb[mscratchcswl] = MK_CSR_RD_CB(read_plain(a,r);};
        //        hart_if.csr_wr_cb[mscratchcswl] = MK_CSR_WR_CB(write_plain(a,r);};
        hart_if.csr_rd_cb[arch::mintthresh] = MK_CSR_RD_CB(read_plain);
        hart_if.csr_wr_cb[arch::mintthresh] = MK_CSR_WR_CB(write_intthresh);
        if(cfg.nmode) {
            hart_if.csr_rd_cb[arch::utvt] = MK_CSR_RD_CB(read_plain);
            hart_if.csr_wr_cb[arch::utvt] = MK_CSR_WR_CB(write_xtvt);
            hart_if.csr_rd_cb[arch::uintstatus] = MK_CSR_RD_CB(read_intstatus);
            hart_if.csr_wr_cb[arch::uintstatus] = MK_CSR_WR_CB(write_null);
            hart_if.csr_rd_cb[arch::uintthresh] = MK_CSR_RD_CB(read_plain);
            hart_if.csr_wr_cb[arch::uintthresh] = MK_CSR_WR_CB(write_intthresh);
        }
        hart_if.csr[arch::mintthresh] = (1 << (cfg.clic_int_ctl_bits)) - 1;
        hart_if.csr[arch::uintthresh] = (1 << (cfg.clic_int_ctl_bits)) - 1;
    }
    ~clic() = default;
    memory_if get_mem_if() override {
        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
    }
    void set_next(memory_if mem) override { down_stream_mem = mem; }
    std::tuple<uint64_t, uint64_t> get_range() override { return {cfg.clic_base, cfg.clic_base + 0x7fff}; }
 private:
    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
        if(addr >= cfg.clic_base && (addr + length) < (cfg.clic_base + 0x8000))
            return read_clic(addr, length, data);
        return down_stream_mem.rd_mem(access, addr, length, data);
    }
    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
        if(addr >= cfg.clic_base && (addr + length) < (cfg.clic_base + 0x8000))
            return write_clic(addr, length, data);
        return down_stream_mem.wr_mem(access, addr, length, data);
    }
    iss::status read_clic(uint64_t addr, unsigned length, uint8_t* data);
    iss::status write_clic(uint64_t addr, unsigned length, uint8_t const* data);
    iss::status write_null(unsigned addr, reg_t val) { return iss::status::Ok; }
    iss::status read_plain(unsigned addr, reg_t& val) {
        val = hart_if.csr[addr];
        return iss::Ok;
    }
    iss::status write_xtvt(unsigned addr, reg_t val) {
        hart_if.csr[addr] = val & ~0x3fULL;
        return iss::Ok;
    }
    iss::status read_cause(unsigned addr, reg_t& val);
    iss::status write_cause(unsigned addr, reg_t val);
    iss::status read_intstatus(unsigned addr, reg_t& val);
    iss::status write_intthresh(unsigned addr, reg_t val);
 protected:
    arch::priv_if<WORD_TYPE> hart_if;
    memory_if down_stream_mem;
    clic_config cfg;
    uint8_t clic_cfg_reg{0};
    std::array<uint32_t, 32> clic_inttrig_reg;
    union clic_int_reg_t {
        struct {
            uint8_t ip;
            uint8_t ie;
            uint8_t attr;
            uint8_t ctl;
        };
        uint32_t raw;
    };
    std::vector<clic_int_reg_t> clic_int_reg;
    uint8_t clic_mprev_lvl{0}, clic_uprev_lvl{0};
    uint8_t clic_mact_lvl{0}, clic_uact_lvl{0};
 };
 template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::read_clic(uint64_t addr, unsigned length, uint8_t* const data) {
    if(addr == cfg.clic_base) { // cliccfg
        *data = clic_cfg_reg;
        for(auto i = 1; i < length; ++i)
            *(data + i) = 0;
    } else if(addr >= (cfg.clic_base + 0x40) && (addr + length) <= (cfg.clic_base + 0x40 + cfg.clic_num_trigger * 4)) { // clicinttrig
        auto offset = ((addr & 0x7fff) - 0x40) / 4;
        read_reg_with_offset(clic_inttrig_reg[offset], addr & 0x3, data, length);
    } else if(addr >= (cfg.clic_base + 0x1000) &&
              (addr + length) <= (cfg.clic_base + 0x1000 + cfg.clic_num_irq * 4)) { // clicintip/clicintie/clicintattr/clicintctl
        auto offset = ((addr & 0x7fff) - 0x1000) / 4;
        read_reg_with_offset(clic_int_reg[offset].raw, addr & 0x3, data, length);
    } else {
        for(auto i = 0U; i < length; ++i)
            *(data + i) = 0;
    }
    return iss::Ok;
 }
 template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::write_clic(uint64_t addr, unsigned length, const uint8_t* const data) {
    if(addr == cfg.clic_base) { // cliccfg
        clic_cfg_reg = (clic_cfg_reg & ~0x1e) | (*data & 0x1e);
    } else if(addr >= (cfg.clic_base + 0x40) && (addr + length) <= (cfg.clic_base + 0x40 + cfg.clic_num_trigger * 4)) { // clicinttrig
        auto offset = ((addr & 0x7fff) - 0x40) / 4;
        write_reg_with_offset(clic_inttrig_reg[offset], addr & 0x3, data, length);
    } else if(addr >= (cfg.clic_base + 0x1000) &&
              (addr + length) <= (cfg.clic_base + 0x1000 + cfg.clic_num_irq * 4)) { // clicintip/clicintie/clicintattr/clicintctl
        auto offset = ((addr & 0x7fff) - 0x1000) / 4;
        write_reg_with_offset(clic_int_reg[offset].raw, addr & 0x3, data, length);
        clic_int_reg[offset].raw &= 0xf0c70101; // clicIntCtlBits->0xf0, clicintattr->0xc7, clicintie->0x1, clicintip->0x1
    }
    return iss::Ok;
 }
 template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::read_cause(unsigned addr, reg_t& val) {
    if((hart_if.csr[arch::mtvec] & 0x3) == 3) {
        val = hart_if.csr[addr] & (1UL << (sizeof(reg_t) * 8) | (hart_if.mcause_max_irq - 1) | (0xfUL << 16));
        auto mode = (addr >> 8) & 0x3;
        switch(mode) {
        case 0:
            val |= clic_uprev_lvl << 16;
            val |= hart_if.mstatus.UPIE << 27;
            break;
        default:
            val |= clic_mprev_lvl << 16;
            val |= hart_if.mstatus.MPIE << 27;
            val |= hart_if.mstatus.MPP << 28;
            break;
        }
    } else
        val = hart_if.csr[addr] & ((1UL << (sizeof(WORD_TYPE) * 8 - 1)) | (hart_if.mcause_max_irq - 1));
    return iss::Ok;
 }
 template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::write_cause(unsigned addr, reg_t val) {
    if((hart_if.csr[arch::mtvec] & 0x3) == 3) {
        auto mask = ((1UL << (sizeof(WORD_TYPE) * 8 - 1)) | (hart_if.mcause_max_irq - 1) | (0xfUL << 16));
        hart_if.csr[addr] = (val & mask) | (hart_if.csr[addr] & ~mask);
        auto mode = (addr >> 8) & 0x3;
        switch(mode) {
        case 0:
            clic_uprev_lvl = ((val >> 16) & 0xff) | (1 << (8 - cfg.clic_int_ctl_bits)) - 1;
            hart_if.mstatus.UPIE = (val >> 27) & 0x1;
            break;
        default:
            clic_mprev_lvl = ((val >> 16) & 0xff) | (1 << (8 - cfg.clic_int_ctl_bits)) - 1;
            hart_if.mstatus.MPIE = (val >> 27) & 0x1;
            hart_if.mstatus.MPP = (val >> 28) & 0x3;
            break;
        }
    } else {
        auto mask = ((1UL << (sizeof(WORD_TYPE) * 8 - 1)) | (hart_if.mcause_max_irq - 1));
        hart_if.csr[addr] = (val & mask) | (hart_if.csr[addr] & ~mask);
    }
    return iss::Ok;
 }
 template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::read_intstatus(unsigned addr, reg_t& val) {
    auto mode = (addr >> 8) & 0x3;
    val = clic_uact_lvl & 0xff;
    if(mode == 0x3)
        val += (clic_mact_lvl & 0xff) << 24;
    return iss::Ok;
 }
 template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::write_intthresh(unsigned addr, reg_t val) {
    hart_if.csr[addr] = (val & 0xff) | (1 << (cfg.clic_int_ctl_bits)) - 1;
    return iss::Ok;
 }
 } // namespace mmio
 } // namespace iss
--- a/src/iss/mmio/memory_if.cpp
+++ b/src/iss/mmio/memory_if.cpp
@@ -0,0 +1,26 @@
 #include "memory_if.h"
 namespace iss {
 namespace mmio {
 void memory_hierarchy::prepend(memory_elem& e) {
    hierarchy.push_front(e);
    update_chain();
 }
 void memory_hierarchy::append(memory_elem& e) {
    hierarchy.push_back(e);
    update_chain();
 }
 void memory_hierarchy::insert_before(memory_elem&) {}
 void memory_hierarchy::insert_after(memory_elem&) {}
 void memory_hierarchy::replace_last(memory_elem&) {}
 void memory_hierarchy::update_chain() {
    bool tail = false;
    for(size_t i = 0; i < hierarchy.size(); ++i) {
        hierarchy[i].get().register_csrs();
        if(i)
            hierarchy[i - 1].get().set_next(hierarchy[i].get().get_mem_if());
    }
 }
 } // namespace mmio
 } // namespace iss
--- a/src/iss/mmio/memory_if.h
+++ b/src/iss/mmio/memory_if.h
@@ -0,0 +1,76 @@
 /*******************************************************************************
 * Copyright (C) 2025 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 * Contributors:
 *       eyck@minres.com - initial implementation
 ******************************************************************************/
 #ifndef _MEMORY_MEMORY_IF_
 #define _MEMORY_MEMORY_IF_
 #include "iss/vm_types.h"
 #include <deque>
 #include <functional>
 #include <limits>
 #include <util/delegate.h>
 namespace iss {
 namespace mmio {
 using rd_mem_func_sig = iss::status(iss::access_type, uint64_t, unsigned, uint8_t*);
 using wr_mem_func_sig = iss::status(iss::access_type, uint64_t, unsigned, uint8_t const*);
 struct memory_if {
    util::delegate<iss::status(access_type, uint64_t, unsigned, uint8_t*)> rd_mem;
    util::delegate<iss::status(access_type, uint64_t, unsigned, uint8_t const*)> wr_mem;
 };
 struct memory_elem {
    virtual memory_if get_mem_if() = 0;
    virtual void set_next(memory_if) = 0;
    virtual void register_csrs() {}
    virtual std::tuple<uint64_t, uint64_t> get_range() { return {0, std::numeric_limits<uint64_t>::max()}; }
 };
 struct memory_hierarchy {
    void prepend(memory_elem&);
    void append(memory_elem&);
    void insert_before(memory_elem&);
    void insert_after(memory_elem&);
    void replace_last(memory_elem&);
 protected:
    void update_chain();
    std::deque<std::reference_wrapper<memory_elem>> hierarchy;
 };
 } // namespace mmio
 } // namespace iss
 #endif
--- a/src/iss/mmio/memory_with_htif.h
+++ b/src/iss/mmio/memory_with_htif.h
@@ -0,0 +1,62 @@
 #ifndef _MEMORY_WITH_HTIF_
 #define _MEMORY_WITH_HTIF_
 #include "iss/arch/riscv_hart_common.h"
 #include "iss/vm_types.h"
 #include "memory_if.h"
 #include <util/logging.h>
 #include <util/sparse_array.h>
 namespace iss {
 namespace mmio {
 template <typename WORD_TYPE> struct memory_with_htif : public memory_elem {
    using this_class = memory_with_htif<WORD_TYPE>;
    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
    memory_with_htif(arch::priv_if<WORD_TYPE> hart_if)
    : hart_if(hart_if) {}
    ~memory_with_htif() = default;
    memory_if get_mem_if() override {
        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
    }
    void set_next(memory_if) override {
        // intenrionally left empty, leaf element
    }
 private:
    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
        for(auto offs = 0U; offs < length; ++offs) {
            *(data + offs) = mem[(addr + offs) % mem.size()];
        }
        return iss::Ok;
    }
    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
        mem_type::page_type& p = mem(addr / mem.page_size);
        std::copy(data, data + length, p.data() + (addr & mem.page_addr_mask));
        // this->tohost handling in case of riscv-test
        // according to https://github.com/riscv-software-src/riscv-isa-sim/issues/364#issuecomment-607657754:
        if(access && iss::access_type::FUNC) {
            if(addr == hart_if.tohost) {
                return hart_if.exec_htif(data);
            }
            if((WORD_LEN == 32 && addr == hart_if.fromhost + 4) || (WORD_LEN == 64 && addr == hart_if.fromhost)) {
                uint64_t fhostvar = *reinterpret_cast<uint64_t*>(p.data() + (hart_if.fromhost & mem.page_addr_mask));
                *reinterpret_cast<uint64_t*>(p.data() + (hart_if.tohost & mem.page_addr_mask)) = fhostvar;
            }
        }
        return iss::Ok;
    }
 protected:
    using mem_type = util::sparse_array<uint8_t, 1ULL << 32>;
    mem_type mem;
    arch::priv_if<WORD_TYPE> hart_if;
 };
 } // namespace mmio
 } // namespace iss
 #endif // _MEMORY_WITH_HTIF_
--- a/src/iss/mmio/pmp.h
+++ b/src/iss/mmio/pmp.h
@@ -0,0 +1,212 @@
 #include "iss/arch/riscv_hart_common.h"
 #include "iss/vm_types.h"
 #include "memory_if.h"
 #include <util/logging.h>
 namespace iss {
 namespace mmio {
 struct clic_config {
    uint64_t clic_base{0xc0000000};
    unsigned clic_int_ctl_bits{4};
    unsigned clic_num_irq{16};
    unsigned clic_num_trigger{0};
    bool nmode{false};
 };
 inline void read_reg_with_offset(uint32_t reg, uint8_t offs, uint8_t* const data, unsigned length) {
    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
    switch(offs) {
    default:
        for(auto i = 0U; i < length; ++i)
            *(data + i) = *(reg_ptr + i);
        break;
    case 1:
        for(auto i = 0U; i < length; ++i)
            *(data + i) = *(reg_ptr + 1 + i);
        break;
    case 2:
        for(auto i = 0U; i < length; ++i)
            *(data + i) = *(reg_ptr + 2 + i);
        break;
    case 3:
        *data = *(reg_ptr + 3);
        break;
    }
 }
 inline void write_reg_with_offset(uint32_t& reg, uint8_t offs, const uint8_t* const data, unsigned length) {
    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
    switch(offs) {
    default:
        for(auto i = 0U; i < length; ++i)
            *(reg_ptr + i) = *(data + i);
        break;
    case 1:
        for(auto i = 0U; i < length; ++i)
            *(reg_ptr + 1 + i) = *(data + i);
        break;
    case 2:
        for(auto i = 0U; i < length; ++i)
            *(reg_ptr + 2 + i) = *(data + i);
        break;
    case 3:
        *(reg_ptr + 3) = *data;
        break;
    }
 }
 template <typename WORD_TYPE> struct pmp : public memory_elem {
    using this_class = pmp<WORD_TYPE>;
    using reg_t = WORD_TYPE;
    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
    pmp(arch::priv_if<WORD_TYPE> hart_if, clic_config cfg)
    : hart_if(hart_if)
    , cfg(cfg) {
        for(size_t i = arch::pmpaddr0; i <= arch::pmpaddr15; ++i) {
            hart_if.csr_rd_cb[i] = MK_CSR_RD_CB(read_plain);
            hart_if.csr_wr_cb[i] = MK_CSR_WR_CB(write_plain);
        }
        for(size_t i = arch::pmpcfg0; i < arch::pmpcfg0 + 16 / sizeof(reg_t); ++i) {
            hart_if.csr_rd_cb[i] = MK_CSR_RD_CB(read_plain);
            hart_if.csr_wr_cb[i] = MK_CSR_WR_CB(write_pmpcfg);
        }
    }
    ~pmp() = default;
    memory_if get_mem_if() override {
        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
    }
    void set_next(memory_if mem) override { down_stream_mem = mem; }
 private:
    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
        if(!pmp_check(access, addr, length) && !is_debug(access)) {
            hart_if.fault_data = addr;
            if(is_debug(access))
                throw trap_access(0, addr);
            hart_if.reg.trap_state = (1UL << 31) | ((access == access_type::FETCH ? 1 : 5) << 16); // issue trap 1
            return iss::Err;
        }
        return down_stream_mem.rd_mem(access, addr, length, data);
    }
    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
        if(!pmp_check(access, addr, length) && !is_debug(access)) {
            hart_if.fault_data = addr;
            if(is_debug(access))
                throw trap_access(0, addr);
            hart_if.reg.trap_state = (1UL << 31) | (7 << 16); // issue trap 1
            return iss::Err;
        }
        return down_stream_mem.wr_mem(access, addr, length, data);
    }
    iss::status read_plain(unsigned addr, reg_t& val) {
        val = hart_if.csr[addr];
        return iss::Ok;
    }
    iss::status write_plain(unsigned addr, reg_t const& val) {
        hart_if.csr[addr] = val;
        return iss::Ok;
    }
    iss::status write_pmpcfg(unsigned addr, reg_t val) {
        hart_if.csr[addr] = val & 0x9f9f9f9f;
        return iss::Ok;
    }
    bool pmp_check(const access_type type, const uint64_t addr, const unsigned len);
 protected:
    arch::priv_if<WORD_TYPE> hart_if;
    memory_if down_stream_mem;
 };
 template <typename WORD_TYPE> bool pmp<WORD_TYPE>::pmp_check(const access_type type, const uint64_t addr, const unsigned len) {
    constexpr auto PMP_SHIFT = 2U;
    constexpr auto PMP_R = 0x1U;
    constexpr auto PMP_W = 0x2U;
    constexpr auto PMP_X = 0x4U;
    constexpr auto PMP_A = 0x18U;
    constexpr auto PMP_L = 0x80U;
    constexpr auto PMP_TOR = 0x1U;
    constexpr auto PMP_NA4 = 0x2U;
    constexpr auto PMP_NAPOT = 0x3U;
    reg_t base = 0;
    auto any_active = false;
    auto const cfg_reg_size = sizeof(reg_t);
    for(size_t i = 0; i < 16; i++) {
        reg_t tor = hart_if.csr[arch::pmpaddr0 + i] << PMP_SHIFT;
        uint8_t cfg = hart_if.csr[arch::pmpcfg0 + (i / cfg_reg_size)] >> (i % cfg_reg_size);
        if(cfg & PMP_A) {
            any_active = true;
            auto pmp_a = (cfg & PMP_A) >> 3;
            auto is_tor = pmp_a == PMP_TOR;
            auto is_na4 = pmp_a == PMP_NA4;
            reg_t mask = (hart_if.csr[arch::pmpaddr0 + i] << 1) | (!is_na4);
            mask = ~(mask & ~(mask + 1)) << PMP_SHIFT;
            // Check each 4-byte sector of the access
            auto any_match = false;
            auto all_match = true;
            for(reg_t offset = 0; offset < len; offset += 1 << PMP_SHIFT) {
                reg_t cur_addr = addr + offset;
                auto napot_match = ((cur_addr ^ tor) & mask) == 0;
                auto tor_match = base <= (cur_addr + len - 1) && cur_addr < tor;
                auto match = is_tor ? tor_match : napot_match;
                any_match |= match;
                all_match &= match;
            }
            if(any_match) {
                // If the PMP matches only a strict subset of the access, fail it
                if(!all_match)
                    return false;
                return (hart_if.reg.PRIV == arch::PRIV_M && !(cfg & PMP_L)) || (type == access_type::READ && (cfg & PMP_R)) ||
                       (type == access_type::WRITE && (cfg & PMP_W)) || (type == access_type::FETCH && (cfg & PMP_X));
            }
        }
        base = tor;
    }
    //    constexpr auto pmp_num_regs = 16;
    //    reg_t tor_base = 0;
    //    auto any_active = false;
    //    auto lower_addr = addr >>2;
    //    auto upper_addr = (addr+len-1)>>2;
    //    for (size_t i = 0; i < pmp_num_regs; i++) {
    //        uint8_t cfg = csr[pmpcfg0+(i/4)]>>(i%4);
    //        uint8_t cfg_next = i==(pmp_num_regs-1)? 0 : csr[pmpcfg0+((i+1)/4)]>>((i+1)%4);
    //        auto pmpaddr = csr[pmpaddr0+i];
    //        if (cfg & PMP_A) {
    //            any_active=true;
    //            auto is_tor = bit_sub<3, 2>(cfg) == PMP_TOR;
    //            auto is_napot = bit_sub<4, 1>(cfg) && bit_sub<3, 2>(cfg_next)!= PMP_TOR;
    //            if(is_napot) {
    //                reg_t mask = bit_sub<3, 1>(cfg)?~( pmpaddr & ~(pmpaddr + 1)): 0x3fffffff;
    //                auto mpmpaddr = pmpaddr & mask;
    //                if((lower_addr&mask) == mpmpaddr && (upper_addr&mask)==mpmpaddr)
    //                    return  (hart_if.reg.PRIV == PRIV_M && !(cfg & PMP_L)) ||
    //                            (type == access_type::READ && (cfg & PMP_R)) ||
    //                            (type == access_type::WRITE && (cfg & PMP_W)) ||
    //                            (type == access_type::FETCH && (cfg & PMP_X));
    //            } else if(is_tor) {
    //                if(lower_addr>=tor_base && upper_addr<=pmpaddr)
    //                    return  (hart_if.reg.PRIV == PRIV_M && !(cfg & PMP_L)) ||
    //                            (type == access_type::READ && (cfg & PMP_R)) ||
    //                            (type == access_type::WRITE && (cfg & PMP_W)) ||
    //                            (type == access_type::FETCH && (cfg & PMP_X));
    //            }
    //        }
    //        tor_base = pmpaddr;
    //    }
    return !any_active || hart_if.reg.PRIV == arch::PRIV_M;
 }
 } // namespace mmio
 } // namespace iss
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -69,7 +69,8 @@ int main(int argc, char* argv[]) {
        ("logfile,l", po::value<std::string>(), "Sets default log file.")
        ("disass,d", po::value<std::string>()->implicit_value(""), "Enables disassembly")
        ("gdb-port,g", po::value<unsigned>()->default_value(0), "enable gdb server and specify port to use")
-        ("instructions,i", po::value<uint64_t>()->default_value(std::numeric_limits<uint64_t>::max()), "max. number of instructions to simulate")
+        ("ilimit,i", po::value<uint64_t>()->default_value(std::numeric_limits<uint64_t>::max()), "max. number of instructions to simulate")
        ("flimit", po::value<uint64_t>()->default_value(std::numeric_limits<uint64_t>::max()), "max. number of fetches to simulate")
        ("reset,r", po::value<std::string>(), "reset address")
        ("dump-ir", "dump the intermediate representation")
        ("elf,f", po::value<std::vector<std::string>>(), "ELF file(s) to load")
@@ -140,7 +141,10 @@ int main(int argc, char* argv[]) {
            std::tie(cpu, vm) = f.create(isa_opt, clim["gdb-port"].as<unsigned>(), &semihosting_cb);
        }
        if(!cpu) {
-            CPPLOG(ERR) << "Could not create cpu for isa " << isa_opt << " and backend " << clim["backend"].as<std::string>() << std::endl;
+            auto list = f.get_names();
            std::sort(std::begin(list), std::end(list));
            CPPLOG(ERR) << "Could not create cpu for isa " << isa_opt << " and backend " << clim["backend"].as<std::string>() << "\n"
                        << "Available implementations (core|platform|backend):\n  - " << util::join(list, "\n  - ") << std::endl;
            return 127;
        }
        if(!vm) {
@@ -202,21 +206,36 @@ int main(int argc, char* argv[]) {
        if(clim.count("elf"))
            for(std::string input : clim["elf"].as<std::vector<std::string>>()) {
                auto start_addr = vm->get_arch()->load_file(input);
-                if(start_addr.second) // FIXME: this always evaluates to true as load file always returns <sth, true>
+                if(start_addr.second)
                    start_address = start_addr.first;
                else {
                    LOG(ERR) << "Error occured while loading file " << input << std::endl;
                    return 1;
                }
            }
        for(std::string input : args) {
            auto start_addr = vm->get_arch()->load_file(input); // treat remaining arguments as elf files
-            if(start_addr.second) // FIXME: this always evaluates to true as load file always returns <sth, true>
+            if(start_addr.second)
                start_address = start_addr.first;
            else {
                LOG(ERR) << "Error occured while loading file " << input << std::endl;
                return 1;
            }
        }
        if(clim.count("reset")) {
            auto str = clim["reset"].as<std::string>();
            start_address = str.find("0x") == 0 ? std::stoull(str.substr(2), nullptr, 16) : std::stoull(str, nullptr, 10);
        }
        vm->reset(start_address);
-        auto cycles = clim["instructions"].as<uint64_t>();
+        auto limit = clim["ilimit"].as<uint64_t>();
-        res = vm->start(cycles, dump);
+        auto cond = iss::finish_cond_e::JUMP_TO_SELF;
        if(clim.count("flimit")) {
            cond = cond | iss::finish_cond_e::FCOUNT_LIMIT;
            limit = clim["flimit"].as<uint64_t>();
        } else {
            cond = cond | iss::finish_cond_e::ICOUNT_LIMIT;
        }
        res = vm->start(limit, dump, cond);
        auto instr_if = vm->get_arch()->get_instrumentation_if();
        // this assumes a single input file
--- a/src/sysc/core_complex.cpp
+++ b/src/sysc/core_complex.cpp
@@ -42,7 +42,6 @@
 #include <iss/plugin/loader.h>
 #endif
 #include "sc_core_adapter_if.h"
 #include <iss/arch/tgc_mapper.h>
 #include <scc/report.h>
 #include <util/ities.h>
 #include <iostream>
@@ -125,7 +124,7 @@ using vm_ptr = std::unique_ptr<iss::vm_if>;
 class core_wrapper {
 public:
-    core_wrapper(core_complex* owner)
+    core_wrapper(core_complex_if* owner)
    : owner(owner) {}
    void reset(uint64_t addr) { vm->reset(addr); }
@@ -181,7 +180,7 @@ public:
                                             "SystemC sub-commands: break <time>, print_time"});
    }
-    core_complex* const owner;
+    core_complex_if* const owner;
    vm_ptr vm{nullptr};
    sc_cpu_ptr cpu{nullptr};
    iss::debugger::target_adapter_if* tgt_adapter{nullptr};
@@ -197,9 +196,9 @@ struct core_trace {
    scv_tr_handle tr_handle;
 };
 SC_HAS_PROCESS(core_complex); // NOLINT
 #ifndef CWR_SYSTEMC
-core_complex::core_complex(sc_module_name const& name)
+template <unsigned int BUSWIDTH>
 core_complex<BUSWIDTH>::core_complex(sc_module_name const& name)
 : sc_module(name)
 , fetch_lut(tlm_dmi_ext())
 , read_lut(tlm_dmi_ext())
@@ -208,7 +207,7 @@ core_complex::core_complex(sc_module_name const& name)
 }
 #endif
-void core_complex::init() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::init() {
    trc = new core_trace();
    ibus.register_invalidate_direct_mem_ptr([=](uint64_t start, uint64_t end) -> void {
        auto lut_entry = fetch_lut.getEntry(start);
@@ -227,6 +226,7 @@ void core_complex::init() {
        }
    });
    SC_HAS_PROCESS(core_complex<BUSWIDTH>); // NOLINT
    SC_THREAD(run);
    SC_METHOD(rst_cb);
    sensitive << rst_i;
@@ -252,16 +252,16 @@ void core_complex::init() {
 #endif
 }
-core_complex::~core_complex() {
+template <unsigned int BUSWIDTH> core_complex<BUSWIDTH>::~core_complex() {
    delete cpu;
    delete trc;
    for(auto* p : plugin_list)
        delete p;
 }
-void core_complex::trace(sc_trace_file* trf) const {}
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::trace(sc_trace_file* trf) const {}
-void core_complex::before_end_of_elaboration() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::before_end_of_elaboration() {
    SCCDEBUG(SCMOD) << "instantiating iss::arch::tgf with " << GET_PROP_VALUE(backend) << " backend";
    // cpu = scc::make_unique<core_wrapper>(this);
    cpu = new core_wrapper(this);
@@ -302,7 +302,7 @@ void core_complex::before_end_of_elaboration() {
    }
 }
-void core_complex::start_of_simulation() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::start_of_simulation() {
    // quantum_keeper.reset();
    if(GET_PROP_VALUE(elf_file).size() > 0) {
        istringstream is(GET_PROP_VALUE(elf_file));
@@ -325,7 +325,7 @@ void core_complex::start_of_simulation() {
    }
 }
-bool core_complex::disass_output(uint64_t pc, const std::string instr_str) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::disass_output(uint64_t pc, const std::string instr_str) {
    if(trc->m_db == nullptr)
        return false;
    if(trc->tr_handle.is_active())
@@ -339,7 +339,7 @@ bool core_complex::disass_output(uint64_t pc, const std::string instr_str) {
    return true;
 }
-void core_complex::forward() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::forward() {
 #ifndef CWR_SYSTEMC
    set_clock_period(clk_i.read());
 #else
@@ -348,24 +348,24 @@ void core_complex::forward() {
 #endif
 }
-void core_complex::set_clock_period(sc_core::sc_time period) {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::set_clock_period(sc_core::sc_time period) {
    curr_clk = period;
    if(period == SC_ZERO_TIME)
        cpu->set_interrupt_execution(true);
 }
-void core_complex::rst_cb() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::rst_cb() {
    if(rst_i.read())
        cpu->set_interrupt_execution(true);
 }
-void core_complex::sw_irq_cb() { cpu->local_irq(3, sw_irq_i.read()); }
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::sw_irq_cb() { cpu->local_irq(3, sw_irq_i.read()); }
-void core_complex::timer_irq_cb() { cpu->local_irq(7, timer_irq_i.read()); }
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::timer_irq_cb() { cpu->local_irq(7, timer_irq_i.read()); }
-void core_complex::ext_irq_cb() { cpu->local_irq(11, ext_irq_i.read()); }
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::ext_irq_cb() { cpu->local_irq(11, ext_irq_i.read()); }
-void core_complex::local_irq_cb() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::local_irq_cb() {
    for(auto i = 0U; i < local_irq_i.size(); ++i) {
        if(local_irq_i[i].event()) {
            cpu->local_irq(16 + i, local_irq_i[i].read());
@@ -373,7 +373,7 @@ void core_complex::local_irq_cb() {
    }
 }
-void core_complex::run() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::run() {
    wait(SC_ZERO_TIME); // separate from elaboration phase
    do {
        wait(SC_ZERO_TIME);
@@ -387,11 +387,11 @@ void core_complex::run() {
        quantum_keeper.reset();
        cpu->set_interrupt_execution(false);
        cpu->start(dump_ir);
-    } while(cpu->get_interrupt_execution());
+    } while(!cpu->get_interrupt_execution());
    sc_stop();
 }
-bool core_complex::read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) {
    auto& dmi_lut = is_fetch ? fetch_lut : read_lut;
    auto lut_entry = dmi_lut.getEntry(addr);
    if(lut_entry.get_granted_access() != tlm::tlm_dmi::DMI_ACCESS_NONE && addr + length <= lut_entry.get_end_address() + 1) {
@@ -449,7 +449,7 @@ bool core_complex::read_mem(uint64_t addr, unsigned length, uint8_t* const data,
    }
 }
-bool core_complex::write_mem(uint64_t addr, unsigned length, const uint8_t* const data) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::write_mem(uint64_t addr, unsigned length, const uint8_t* const data) {
    auto lut_entry = write_lut.getEntry(addr);
    if(lut_entry.get_granted_access() != tlm::tlm_dmi::DMI_ACCESS_NONE && addr + length <= lut_entry.get_end_address() + 1) {
        auto offset = addr - lut_entry.get_start_address();
@@ -497,7 +497,7 @@ bool core_complex::write_mem(uint64_t addr, unsigned length, const uint8_t* cons
    }
 }
-bool core_complex::read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) {
    tlm::tlm_generic_payload gp;
    gp.set_command(tlm::TLM_READ_COMMAND);
    gp.set_address(addr);
@@ -507,7 +507,7 @@ bool core_complex::read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const d
    return dbus->transport_dbg(gp) == length;
 }
-bool core_complex::write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) {
    write_buf.resize(length);
    std::copy(data, data + length, write_buf.begin()); // need to copy as TLM does not guarantee data integrity
    tlm::tlm_generic_payload gp;
@@ -518,5 +518,10 @@ bool core_complex::write_mem_dbg(uint64_t addr, unsigned length, const uint8_t*
    gp.set_streaming_width(length);
    return dbus->transport_dbg(gp) == length;
 }
 template class core_complex<scc::LT>;
 template class core_complex<32>;
 template class core_complex<64>;
 } /* namespace tgfs */
 } /* namespace sysc */
--- a/src/sysc/core_complex.h
+++ b/src/sysc/core_complex.h
@@ -33,6 +33,7 @@
 #ifndef _SYSC_CORE_COMPLEX_H_
 #define _SYSC_CORE_COMPLEX_H_
 #include <scc/signal_opt_ports.h>
 #include <scc/tick2time.h>
 #include <scc/traceable.h>
 #include <scc/utilities.h>
@@ -40,10 +41,8 @@
 #include <tlm/scc/scv/tlm_rec_initiator_socket.h>
 #ifdef CWR_SYSTEMC
 #include <scmlinc/scml_property.h>
 #define SOCKET_WIDTH 32
 #else
 #include <cci_configuration>
 #define SOCKET_WIDTH scc::LT
 #endif
 #include <memory>
 #include <tlm>
@@ -68,12 +67,35 @@ public:
 namespace tgfs {
 class core_wrapper;
 struct core_trace;
 struct core_complex_if {
-class core_complex : public sc_core::sc_module, public scc::traceable {
+    virtual ~core_complex_if() = default;
    virtual bool read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) = 0;
    virtual bool write_mem(uint64_t addr, unsigned length, const uint8_t* const data) = 0;
    virtual bool read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) = 0;
    virtual bool write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) = 0;
    virtual bool disass_output(uint64_t pc, const std::string instr) = 0;
    virtual unsigned get_last_bus_cycles() = 0;
    //! Allow quantum keeper handling
    virtual void sync(uint64_t) = 0;
    virtual char const* hier_name() = 0;
    scc::sc_in_opt<uint64_t> mtime_i{"mtime_i"};
 };
 template <unsigned int BUSWIDTH = scc::LT> class core_complex : public sc_core::sc_module, public scc::traceable, public core_complex_if {
 public:
-    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<SOCKET_WIDTH>> ibus{"ibus"};
+    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<BUSWIDTH>> ibus{"ibus"};
-    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<SOCKET_WIDTH>> dbus{"dbus"};
+    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<BUSWIDTH>> dbus{"dbus"};
    sc_core::sc_in<bool> rst_i{"rst_i"};
@@ -88,8 +110,6 @@ public:
 #ifndef CWR_SYSTEMC
    sc_core::sc_in<sc_core::sc_time> clk_i{"clk_i"};
    sc_core::sc_port<tlm::tlm_peek_if<uint64_t>, 1, sc_core::SC_ZERO_OR_MORE_BOUND> mtime_o{"mtime_o"};
    cci::cci_param<std::string> elf_file{"elf_file", ""};
    cci::cci_param<bool> enable_disass{"enable_disass", false};
@@ -115,8 +135,6 @@ public:
 #else
    sc_core::sc_in<bool> clk_i{"clk_i"};
    sc_core::sc_in<uint64_t> mtime_i{"mtime_i"};
    scml_property<std::string> elf_file{"elf_file", ""};
    scml_property<bool> enable_disass{"enable_disass", false};
@@ -159,13 +177,13 @@ public:
    ~core_complex();
-    inline unsigned get_last_bus_cycles() {
+    unsigned get_last_bus_cycles() override {
        auto mem_incr = std::max(ibus_inc, dbus_inc);
        ibus_inc = dbus_inc = 0;
        return mem_incr > 1 ? mem_incr : 1;
    }
-    inline void sync(uint64_t cycle) {
+    void sync(uint64_t cycle) override {
        auto core_inc = curr_clk * (cycle - last_sync_cycle);
        quantum_keeper.inc(core_inc);
        if(quantum_keeper.need_sync()) {
@@ -175,20 +193,22 @@ public:
        last_sync_cycle = cycle;
    }
-    bool read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch);
+    bool read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) override;
-    bool write_mem(uint64_t addr, unsigned length, const uint8_t* const data);
+    bool write_mem(uint64_t addr, unsigned length, const uint8_t* const data) override;
-    bool read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data);
+    bool read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) override;
-    bool write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data);
+    bool write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) override;
    void trace(sc_core::sc_trace_file* trf) const override;
-    bool disass_output(uint64_t pc, const std::string instr);
+    bool disass_output(uint64_t pc, const std::string instr) override;
    void set_clock_period(sc_core::sc_time period);
    char const* hier_name() override { return name(); }
 protected:
    void before_end_of_elaboration() override;
    void start_of_simulation() override;
--- a/src/sysc/register_tgc_c.cpp
+++ b/src/sysc/register_tgc_c.cpp
@@ -46,12 +46,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|interp",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -62,12 +62,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|llvm",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -79,12 +79,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|tcc",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -96,12 +96,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|asmjit",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
--- a/src/sysc/sc_core_adapter.h
+++ b/src/sysc/sc_core_adapter.h
@@ -21,7 +21,7 @@ public:
    using reg_t = typename iss::arch::traits<typename PLAT::core>::reg_t;
    using phys_addr_t = typename iss::arch::traits<typename PLAT::core>::phys_addr_t;
    using heart_state_t = typename PLAT::hart_state_type;
-    sc_core_adapter(sysc::tgfs::core_complex* owner)
+    sc_core_adapter(sysc::tgfs::core_complex_if* owner)
    : owner(owner) {}
    iss::arch_if* get_arch_if() override { return this; }
@@ -54,9 +54,9 @@ public:
            std::stringstream s;
            s << "[p:" << lvl[this->reg.PRIV] << ";s:0x" << std::hex << std::setfill('0') << std::setw(sizeof(reg_t) * 2)
              << (reg_t)this->state.mstatus << std::dec << ";c:" << this->reg.icount + this->cycle_offset << "]";
-            SCCDEBUG(owner->name()) << "disass: "
+            SCCDEBUG(owner->hier_name()) << "disass: "
-                                    << "0x" << std::setw(16) << std::right << std::setfill('0') << std::hex << pc << "\t\t" << std::setw(40)
+                                         << "0x" << std::setw(16) << std::right << std::setfill('0') << std::hex << pc << "\t\t"
-                                    << std::setfill(' ') << std::left << instr << s.str();
+                                         << std::setw(40) << std::setfill(' ') << std::left << instr << s.str();
        }
    };
@@ -71,62 +71,66 @@ public:
    iss::status write_mem(phys_addr_t addr, unsigned length, const uint8_t* const data) override {
        if(addr.access && iss::access_type::DEBUG)
            return owner->write_mem_dbg(addr.val, length, data) ? iss::Ok : iss::Err;
-        else {
+        if(addr.val == this->tohost) {
-            auto tohost_upper = (sizeof(reg_t) == 4 && addr.val == (this->tohost + 4)) || (sizeof(reg_t) == 8 && addr.val == this->tohost);
+            reg_t cur_data = *reinterpret_cast<const reg_t*>(data);
-            auto tohost_lower = (sizeof(reg_t) == 4 && addr.val == this->tohost) || (sizeof(reg_t) == 64 && addr.val == this->tohost);
+            // Extract Device (bits 63:56)
-            if(tohost_lower || tohost_upper) {
+            uint8_t device = sizeof(reg_t) == 4 ? 0 : (cur_data >> 56) & 0xFF;
-                if(tohost_upper || (tohost_lower && to_host_wr_cnt > 0)) {
+            // Extract Command (bits 55:48)
-                    switch(hostvar >> 48) {
+            uint8_t command = sizeof(reg_t) == 4 ? 0 : (cur_data >> 48) & 0xFF;
-                    case 0:
+            // Extract payload (bits 47:0)
-                        if(hostvar != 0x1) {
+            uint64_t payload_addr = cur_data & 0xFFFFFFFFFFFFULL; // 24bits
-                            SCCINFO(owner->name())
+            if(payload_addr & 1) {
-                                << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar << "), stopping simulation";
+                if(payload_addr != 0x1) {
-                        } else {
+                    SCCERR(owner->hier_name()) << "tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr
-                            SCCINFO(owner->name())
+                                               << "), stopping simulation";
-                                << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar << "), stopping simulation";
+                } else {
-                        }
+                    SCCINFO(owner->hier_name())
-                        this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                        << "tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr << "), stopping simulation";
                        this->interrupt_sim = hostvar;
 #ifndef WITH_TCC
                        throw(iss::simulation_stopped(hostvar));
 #endif
                        break;
                    default:
                        break;
                    }
                } else if(tohost_lower)
                    to_host_wr_cnt++;
                return iss::Ok;
            } else {
                auto res = owner->write_mem(addr.val, length, data) ? iss::Ok : iss::Err;
                // clear MTIP on mtimecmp write
                if(addr.val == 0x2004000) {
                    reg_t val;
                    this->read_csr(iss::arch::mip, val);
                    if(val & (1ULL << 7))
                        this->write_csr(iss::arch::mip, val & ~(1ULL << 7));
                }
-                return res;
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
                this->interrupt_sim = payload_addr;
 #ifndef WITH_TCC
                throw(iss::simulation_stopped(payload_addr));
 #endif
                return iss::Ok;
            }
            if(device == 0 && command == 0) {
                std::array<uint64_t, 8> loaded_payload;
                auto res = owner->read_mem(payload_addr, 8 * sizeof(uint64_t), reinterpret_cast<uint8_t*>(loaded_payload.data()), false)
                               ? iss::Ok
                               : iss::Err;
                if(res == iss::Err) {
                    SCCERR(owner->hier_name()) << "Syscall read went wrong";
                    return iss::Ok;
                }
                uint64_t syscall_num = loaded_payload.at(0);
                if(syscall_num == 64) // SYS_WRITE
                    return this->execute_sys_write(this, loaded_payload, PLAT::MEM);
                SCCERR(owner->hier_name()) << "tohost syscall with number 0x" << std::hex << syscall_num << std::dec << " (" << syscall_num
                                           << ") not implemented";
                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
                this->interrupt_sim = payload_addr;
                return iss::Ok;
            }
            SCCERR(owner->hier_name()) << "tohost functionality not implemented for device " << device << " and command " << command;
            this->reg.trap_state = std::numeric_limits<uint32_t>::max();
            this->interrupt_sim = payload_addr;
            return iss::Ok;
        }
        auto res = owner->write_mem(addr.val, length, data) ? iss::Ok : iss::Err;
        // clear MTIP on mtimecmp write
        if(addr.val == 0x2004000) {
            reg_t val;
            this->read_csr(iss::arch::mip, val);
            if(val & (1ULL << 7))
                this->write_csr(iss::arch::mip, val & ~(1ULL << 7));
        }
        return res;
    }
    iss::status read_csr(unsigned addr, reg_t& val) override {
 #ifndef CWR_SYSTEMC
        if((addr == iss::arch::time || addr == iss::arch::timeh) && owner->mtime_o.get_interface(0)) {
            uint64_t time_val;
            bool ret = owner->mtime_o->nb_peek(time_val);
            if(addr == iss::arch::time) {
                val = static_cast<reg_t>(time_val);
            } else if(addr == iss::arch::timeh) {
                if(sizeof(reg_t) != 4)
                    return iss::Err;
                val = static_cast<reg_t>(time_val >> 32);
            }
            return ret ? iss::Ok : iss::Err;
 #else
        if((addr == iss::arch::time || addr == iss::arch::timeh)) {
-            uint64_t time_val = owner->mtime_i.read();
+            uint64_t time_val = owner->mtime_i.get_interface() ? owner->mtime_i.read() : 0;
            if(addr == iss::arch::time) {
                val = static_cast<reg_t>(time_val);
            } else if(addr == iss::arch::timeh) {
@@ -135,14 +139,13 @@ public:
                val = static_cast<reg_t>(time_val >> 32);
            }
            return iss::Ok;
 #endif
        } else {
            return PLAT::read_csr(addr, val);
        }
    }
    void wait_until(uint64_t flags) override {
-        SCCDEBUG(owner->name()) << "Sleeping until interrupt";
+        SCCDEBUG(owner->hier_name()) << "Sleeping until interrupt";
        while(this->reg.pending_trap == 0 && (this->csr[iss::arch::mip] & this->csr[iss::arch::mie]) == 0) {
            sc_core::wait(wfi_evt);
        }
@@ -173,13 +176,12 @@ public:
            this->csr[iss::arch::mip] &= ~mask;
        this->check_interrupt();
        if(value)
-            SCCTRACE(owner->name()) << "Triggering interrupt " << id << " Pending trap: " << this->reg.pending_trap;
+            SCCTRACE(owner->hier_name()) << "Triggering interrupt " << id << " Pending trap: " << this->reg.pending_trap;
    }
 private:
-    sysc::tgfs::core_complex* const owner;
+    sysc::tgfs::core_complex_if* const owner{nullptr};
    sc_core::sc_event wfi_evt;
    uint64_t hostvar{std::numeric_limits<uint64_t>::max()};
    unsigned to_host_wr_cnt = 0;
    bool first{true};
 };
--- a/src/vm/asmjit/vm_tgc5c.cpp
+++ b/src/vm/asmjit/vm_tgc5c.cpp
--- a/src/vm/fp_functions.cpp
+++ b/src/vm/fp_functions.cpp
@@ -33,6 +33,7 @@
 ////////////////////////////////////////////////////////////////////////////////
 #include "fp_functions.h"
 #include <array>
 extern "C" {
 #include "internals.h"
@@ -43,9 +44,10 @@ extern "C" {
 #include <limits>
 using this_t = uint8_t*;
-const uint8_t rmm_map[] = {
+// this does not inlcude any reserved rm or the DYN rm, as DYN rm should be taken care of in the vm_impl
-    softfloat_round_near_even /*RNE*/,   softfloat_round_minMag /*RTZ*/, softfloat_round_min /*RDN*/, softfloat_round_max /*RUP?*/,
+const std::array<uint8_t, 5> rmm_map = {
-    softfloat_round_near_maxMag /*RMM*/, softfloat_round_max /*RTZ*/,    softfloat_round_max /*RTZ*/, softfloat_round_max /*RTZ*/,
+    softfloat_round_near_even /*RNE*/, softfloat_round_minMag /*RTZ*/, softfloat_round_min /*RDN*/, softfloat_round_max /*RUP?*/,
    softfloat_round_near_maxMag /*RMM*/
 };
 const uint32_t quiet_nan32 = 0x7fC00000;
@@ -56,7 +58,7 @@ uint32_t fget_flags() { return softfloat_exceptionFlags & 0x1f; }
 uint32_t fadd_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_add(v1f, v2f);
    return r.v;
@@ -64,7 +66,7 @@ uint32_t fadd_s(uint32_t v1, uint32_t v2, uint8_t mode) {
 uint32_t fsub_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_sub(v1f, v2f);
    return r.v;
@@ -72,7 +74,7 @@ uint32_t fsub_s(uint32_t v1, uint32_t v2, uint8_t mode) {
 uint32_t fmul_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_mul(v1f, v2f);
    return r.v;
@@ -80,7 +82,7 @@ uint32_t fmul_s(uint32_t v1, uint32_t v2, uint8_t mode) {
 uint32_t fdiv_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_div(v1f, v2f);
    return r.v;
@@ -88,7 +90,7 @@ uint32_t fdiv_s(uint32_t v1, uint32_t v2, uint8_t mode) {
 uint32_t fsqrt_s(uint32_t v1, uint8_t mode) {
    float32_t v1f{v1};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_sqrt(v1f);
    return r.v;
@@ -130,18 +132,18 @@ uint32_t fcvt_s(uint32_t v1, uint32_t op, uint8_t mode) {
    softfloat_exceptionFlags = 0;
    float32_t r;
    switch(op) {
-    case 0: { // w->s, fp to int32
+    case 0: { // FCVT__W__S
-        uint_fast32_t res = f32_to_i32(v1f, rmm_map[mode & 0x7], true);
+        uint_fast32_t res = f32_to_i32(v1f, rmm_map.at(mode), true);
        return (uint32_t)res;
    }
-    case 1: { // wu->s
+    case 1: { // FCVT__WU__S
-        uint_fast32_t res = f32_to_ui32(v1f, rmm_map[mode & 0x7], true);
+        uint_fast32_t res = f32_to_ui32(v1f, rmm_map.at(mode), true);
        return (uint32_t)res;
    }
-    case 2: // s->w
+    case 2: // FCVT__S__W
-        r = i32_to_f32(v1);
+        r = i32_to_f32((int32_t)v1);
        return r.v;
-    case 3: // s->wu
+    case 3: // FCVT__S__WU
        r = ui32_to_f32(v1);
        return r.v;
    }
@@ -149,12 +151,24 @@ uint32_t fcvt_s(uint32_t v1, uint32_t op, uint8_t mode) {
 }
 uint32_t fmadd_s(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t op, uint8_t mode) {
-    // op should be {softfloat_mulAdd_subProd(2), softfloat_mulAdd_subC(1)}
+    uint32_t F32_SIGN = 1UL << 31;
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    switch(op) {
    case 0: // FMADD_S
        break;
    case 1: // FMSUB_S
        v3 ^= F32_SIGN;
        break;
    case 2: // FNMADD_S
        v1 ^= F32_SIGN;
        v3 ^= F32_SIGN;
        break;
    case 3: // FNMSUB_S
        v1 ^= F32_SIGN;
        break;
    }
    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
-    float32_t res = softfloat_mulAddF32(v1, v2, v3, op & 0x1);
+    float32_t res = softfloat_mulAddF32(v1, v2, v3, 0);
    if(op > 1)
        res.v ^= 1ULL << 31;
    return res.v;
 }
@@ -189,8 +203,8 @@ uint32_t fclass_s(uint32_t v1) {
    uA.f = a;
    uiA = uA.ui;
-    uint_fast16_t infOrNaN = expF32UI(uiA) == 0xFF;
+    bool infOrNaN = expF32UI(uiA) == 0xFF;
-    uint_fast16_t subnormalOrZero = expF32UI(uiA) == 0;
+    bool subnormalOrZero = expF32UI(uiA) == 0;
    bool sign = signF32UI(uiA);
    bool fracZero = fracF32UI(uiA) == 0;
    bool isNaN = isNaNF32UI(uiA);
@@ -203,9 +217,13 @@ uint32_t fclass_s(uint32_t v1) {
 }
 uint32_t fconv_d2f(uint64_t v1, uint8_t mode) {
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    bool isNan = isNaNF64UI(v1);
-    bool nan = (v1 & defaultNaNF64UI) == defaultNaNF64UI;
+    bool isSNaN = softfloat_isSigNaNF64UI(v1);
-    if(nan) {
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    if(isNan) {
        if(isSNaN)
            softfloat_raiseFlags(softfloat_flag_invalid);
        return defaultNaNF32UI;
    } else {
        float32_t res = f64_to_f32(float64_t{v1});
@@ -214,11 +232,11 @@ uint32_t fconv_d2f(uint64_t v1, uint8_t mode) {
 }
 uint64_t fconv_f2d(uint32_t v1, uint8_t mode) {
-    bool nan = (v1 & defaultNaNF32UI) == defaultNaNF32UI;
+    bool infOrNaN = expF32UI(v1) == 0xFF;
-    if(nan) {
+    bool subnormalOrZero = expF32UI(v1) == 0;
    if(infOrNaN || subnormalOrZero) {
        return defaultNaNF64UI;
    } else {
        softfloat_roundingMode = rmm_map[mode & 0x7];
        float64_t res = f32_to_f64(float32_t{v1});
        return res.v;
    }
@@ -228,7 +246,7 @@ uint64_t fadd_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    bool nan = (v1 & defaultNaNF32UI) == quiet_nan32;
    bool snan = softfloat_isSigNaNF32UI(v1);
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_add(v1f, v2f);
    return r.v;
@@ -236,7 +254,7 @@ uint64_t fadd_d(uint64_t v1, uint64_t v2, uint8_t mode) {
 uint64_t fsub_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_sub(v1f, v2f);
    return r.v;
@@ -244,7 +262,7 @@ uint64_t fsub_d(uint64_t v1, uint64_t v2, uint8_t mode) {
 uint64_t fmul_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_mul(v1f, v2f);
    return r.v;
@@ -252,7 +270,7 @@ uint64_t fmul_d(uint64_t v1, uint64_t v2, uint8_t mode) {
 uint64_t fdiv_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_div(v1f, v2f);
    return r.v;
@@ -260,7 +278,7 @@ uint64_t fdiv_d(uint64_t v1, uint64_t v2, uint8_t mode) {
 uint64_t fsqrt_d(uint64_t v1, uint8_t mode) {
    float64_t v1f{v1};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_sqrt(v1f);
    return r.v;
@@ -298,22 +316,23 @@ uint64_t fcmp_d(uint64_t v1, uint64_t v2, uint32_t op) {
 }
 uint64_t fcvt_d(uint64_t v1, uint32_t op, uint8_t mode) {
    float64_t v1f{v1};
    softfloat_exceptionFlags = 0;
    float64_t r;
    switch(op) {
-    case 0: { // l->d, fp to int32
+    case 0: { // l from d
-        int64_t res = f64_to_i64(v1f, rmm_map[mode & 0x7], true);
+        int64_t res = f64_to_i64(v1f, rmm_map.at(mode), true);
        return (uint64_t)res;
    }
-    case 1: { // lu->s
+    case 1: { // lu from d
-        uint64_t res = f64_to_ui64(v1f, rmm_map[mode & 0x7], true);
+        uint64_t res = f64_to_ui64(v1f, rmm_map.at(mode), true);
        return res;
    }
-    case 2: // s->l
+    case 2: // d from l
        r = i64_to_f64(v1);
        return r.v;
-    case 3: // s->lu
+    case 3: // d from lu
        r = ui64_to_f64(v1);
        return r.v;
    }
@@ -321,12 +340,24 @@ uint64_t fcvt_d(uint64_t v1, uint32_t op, uint8_t mode) {
 }
 uint64_t fmadd_d(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mode) {
-    // op should be {softfloat_mulAdd_subProd(2), softfloat_mulAdd_subC(1)}
+    uint64_t F64_SIGN = 1ULL << 63;
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    switch(op) {
    case 0: // FMADD_D
        break;
    case 1: // FMSUB_D
        v3 ^= F64_SIGN;
        break;
    case 2: // FNMADD_D
        v1 ^= F64_SIGN;
        v3 ^= F64_SIGN;
        break;
    case 3: // FNMSUB_D
        v1 ^= F64_SIGN;
        break;
    }
    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
-    float64_t res = softfloat_mulAddF64(v1, v2, v3, op & 0x1);
+    float64_t res = softfloat_mulAddF64(v1, v2, v3, 0);
    if(op > 1)
        res.v ^= 1ULL << 63;
    return res.v;
 }
@@ -362,8 +393,8 @@ uint64_t fclass_d(uint64_t v1) {
    uA.f = a;
    uiA = uA.ui;
-    uint_fast16_t infOrNaN = expF64UI(uiA) == 0x7FF;
+    bool infOrNaN = expF64UI(uiA) == 0x7FF;
-    uint_fast16_t subnormalOrZero = expF64UI(uiA) == 0;
+    bool subnormalOrZero = expF64UI(uiA) == 0;
    bool sign = signF64UI(uiA);
    bool fracZero = fracF64UI(uiA) == 0;
    bool isNaN = isNaNF64UI(uiA);
@@ -381,9 +412,9 @@ uint64_t fcvt_32_64(uint32_t v1, uint32_t op, uint8_t mode) {
    float64_t r;
    switch(op) {
    case 0: // l->s, fp to int32
-        return f32_to_i64(v1f, rmm_map[mode & 0x7], true);
+        return f32_to_i64(v1f, rmm_map.at(mode), true);
    case 1: // wu->s
-        return f32_to_ui64(v1f, rmm_map[mode & 0x7], true);
+        return f32_to_ui64(v1f, rmm_map.at(mode), true);
    case 2: // s->w
        r = i32_to_f64(v1);
        return r.v;
@@ -399,11 +430,11 @@ uint32_t fcvt_64_32(uint64_t v1, uint32_t op, uint8_t mode) {
    float32_t r;
    switch(op) {
    case 0: { // wu->s
-        int32_t r = f64_to_i32(float64_t{v1}, rmm_map[mode & 0x7], true);
+        int32_t r = f64_to_i32(float64_t{v1}, rmm_map.at(mode), true);
        return r;
    }
    case 1: { // wu->s
-        uint32_t r = f64_to_ui32(float64_t{v1}, rmm_map[mode & 0x7], true);
+        uint32_t r = f64_to_ui32(float64_t{v1}, rmm_map.at(mode), true);
        return r;
    }
    case 2: // l->s, fp to int32
--- a/src/vm/interp/vm_tgc5c.cpp
+++ b/src/vm/interp/vm_tgc5c.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 20217-2024 MINRES Technologies GmbH
+ * Copyright (C) 2017-2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,7 @@
 *******************************************************************************/
 // clang-format off
 #include <cstdint>
 #include <iss/arch/tgc5c.h>
 #include <iss/debugger/gdb_session.h>
 #include <iss/debugger/server.h>
@@ -43,6 +44,8 @@
 #include <exception>
 #include <vector>
 #include <sstream>
 #include <iss/instruction_decoder.h>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
@@ -93,7 +96,8 @@ protected:
    using compile_ret_t = virt_addr_t;
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr);
-    inline const char *name(size_t index){return index<traits::reg_aliases.size()?traits::reg_aliases[index]:"illegal";}
+    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
    virt_addr_t execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit) override;
@@ -102,7 +106,6 @@ protected:
    inline void raise(uint16_t trap_id, uint16_t cause){
        auto trap_val =  0x80ULL << 24 | (cause << 16) | trap_id;
        this->core.reg.trap_state = trap_val;
        this->template get_reg<uint32_t>(traits::NEXT_PC) = std::numeric_limits<uint32_t>::max();
    }
    inline void leave(unsigned lvl){
@@ -113,7 +116,12 @@ protected:
        this->core.wait_until(type);
    }
    inline void set_tval(uint64_t new_tval){
        tval = new_tval;
    }
    uint64_t fetch_count{0};
    uint64_t tval{0};
    using yield_t = boost::coroutines2::coroutine<void>::push_type;
    using coro_t = boost::coroutines2::coroutine<void>::pull_type;
@@ -142,20 +150,12 @@ private:
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
-        size_t length;
+        uint32_t length;
        uint32_t value;
        uint32_t mask;
        typename arch::traits<ARCH>::opcode_e op;
    };
    struct decoding_tree_node{
        std::vector<instruction_descriptor> instrs;
        std::vector<decoding_tree_node*> children;
        uint32_t submask = std::numeric_limits<uint32_t>::max();
        uint32_t value;
        decoding_tree_node(uint32_t value) : value(value){}
    };
    decoding_tree_node* root {nullptr};
    const std::array<instruction_descriptor, 87> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */
        {32, 0b00000000000000000000000000110111, 0b00000000000000000000000001111111, arch::traits<ARCH>::opcode_e::LUI},
@@ -247,6 +247,9 @@ private:
        {16, 0b0000000000000000, 0b1111111111111111, arch::traits<ARCH>::opcode_e::DII},
    }};
    //needs to be declared after instr_descr
    decoder instr_decoder;
    iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
        if(this->core.has_mmu()) {
            auto phys_pc = this->core.virt2phys(pc);
@@ -260,73 +263,18 @@ private:
                    return iss::Err;
 //            }
        } else {
-            if (this->core.read(phys_addr_t(pc.access, pc.space, pc.val), 4, data) != iss::Ok)
+            if (this->core.read(iss::address_type::PHYSICAL, pc.access, pc.space, pc.val, 4, data) != iss::Ok)
                return iss::Err;
        }
        return iss::Ok;
    }
    void populate_decoding_tree(decoding_tree_node* root){
        //create submask
        for(auto instr: root->instrs){
            root->submask &= instr.mask;
        }
        //put each instr according to submask&encoding into children
        for(auto instr: root->instrs){
            bool foundMatch = false;
            for(auto child: root->children){
                //use value as identifying trait
                if(child->value == (instr.value&root->submask)){
                    child->instrs.push_back(instr);
                    foundMatch = true;
                }
            }
            if(!foundMatch){
                decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
                child->instrs.push_back(instr);
                root->children.push_back(child);
            }
        }
        root->instrs.clear();
        //call populate_decoding_tree for all children
        if(root->children.size() >1)
            for(auto child: root->children){
                populate_decoding_tree(child);      
            }
        else{
            //sort instrs by value of the mask, this works bc we want to have the least restrictive one last
            std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
            return instr1.mask > instr2.mask;
            }); 
        }
    }
    typename arch::traits<ARCH>::opcode_e  decode_instr(decoding_tree_node* node, code_word_t word){
        if(!node->children.size()){
            if(node->instrs.size() == 1) return node->instrs[0].op;
            for(auto instr : node->instrs){
                if((instr.mask&word) == instr.value) return instr.op;
            }
        }
        else{
            for(auto child : node->children){
                if (child->value == (node->submask&word)){
                    return decode_instr(child, word);
                }  
            }  
        }
        return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
    }
 };
 template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
    volatile CODE_WORD x = insn;
    insn = 2 * x;
 }
 template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
 // according to
 // https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation
 #ifdef __GCC__
@@ -343,13 +291,16 @@ constexpr size_t bit_count(uint32_t u) {
 template <typename ARCH>
 vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
-: vm_base<ARCH>(core, core_id, cluster_id) {
+: vm_base<ARCH>(core, core_id, cluster_id)
-    root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
+, instr_decoder([this]() {
-    for(auto instr:instr_descr){
+        std::vector<generic_instruction_descriptor> g_instr_descr;
-        root->instrs.push_back(instr);
+        g_instr_descr.reserve(instr_descr.size());
-    }
+        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
-    populate_decoding_tree(root);
+            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
-}
+            g_instr_descr.push_back(new_instr_descr);
        }
        return std::move(g_instr_descr);
    }()) {}
 inline bool is_icount_limit_enabled(finish_cond_e cond){
    return (cond & finish_cond_e::ICOUNT_LIMIT) == finish_cond_e::ICOUNT_LIMIT;
@@ -379,16 +330,24 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
    while(!this->core.should_stop() &&
            !(is_icount_limit_enabled(cond) && icount >= count_limit) &&
            !(is_fcount_limit_enabled(cond) && fetch_count >= count_limit)){
-        fetch_count++;
+        if(this->debugging_enabled())
            this->tgt_adapter->check_continue(*PC);
        pc.val=*PC;
        if(fetch_ins(pc, data)!=iss::Ok){
-            this->do_sync(POST_SYNC, std::numeric_limits<unsigned>::max());
+            if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
-            pc.val = super::core.enter_trap(std::numeric_limits<uint64_t>::max(), pc.val, 0);
+            process_spawn_blocks();
            if(this->sync_exec && POST_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
            pc.val = super::core.enter_trap(arch::traits<ARCH>::RV_CAUSE_FETCH_ACCESS<<16, pc.val, 0);
        } else {
            if (is_jump_to_self_enabled(cond) &&
                    (instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-            auto inst_id = decode_instr(root, instr);
+            uint32_t inst_index = instr_decoder.decode_instr(instr);
            opcode_e inst_id = arch::traits<ARCH>::opcode_e::MAX_OPCODE;;
            if(inst_index <instr_descr.size())
                inst_id = instr_descr[inst_index].op;
            // pre execution stuff
-             this->core.reg.last_branch = 0;
+            this->core.reg.last_branch = 0;
            if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id));
            try{
                switch(inst_id){
@@ -463,14 +422,16 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        if(imm % traits::INSTR_ALIGNMENT) {
+                                        uint32_t new_pc = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int32_t)sext<21>(imm) ));
                                        if(new_pc % traits::INSTR_ALIGNMENT) {
                                            set_tval(new_pc);
                                            raise(0, 0);
                                        }
                                        else {
                                            if(rd != 0) {
                                                *(X+rd) = (uint32_t)((uint64_t)(*PC ) + (uint64_t)(4 ));
                                            }
-                                            *NEXT_PC = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int32_t)sext<21>(imm) ));
+                                            *NEXT_PC = new_pc;
                                            this->core.reg.last_branch = 1;
                                        }
                                    }
@@ -500,6 +461,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        uint32_t addr_mask = (uint32_t)- 2;
                                        uint32_t new_pc = (uint32_t)(((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) )) & (int64_t)(addr_mask ));
                                        if(new_pc % traits::INSTR_ALIGNMENT) {
                                            set_tval(new_pc);
                                            raise(0, 0);
                                        }
                                        else {
@@ -534,11 +496,13 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        if(*(X+rs1) == *(X+rs2)) {
-                                            if((uint32_t)(imm ) % traits::INSTR_ALIGNMENT) {
+                                            uint32_t new_pc = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
                                            if(new_pc % traits::INSTR_ALIGNMENT) {
                                                set_tval(new_pc);
                                                raise(0, 0);
                                            }
                                            else {
-                                                *NEXT_PC = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
+                                                *NEXT_PC = new_pc;
                                                this->core.reg.last_branch = 1;
                                            }
                                        }
@@ -567,11 +531,13 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        if(*(X+rs1) != *(X+rs2)) {
-                                            if((uint32_t)(imm ) % traits::INSTR_ALIGNMENT) {
+                                            uint32_t new_pc = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
                                            if(new_pc % traits::INSTR_ALIGNMENT) {
                                                set_tval(new_pc);
                                                raise(0, 0);
                                            }
                                            else {
-                                                *NEXT_PC = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
+                                                *NEXT_PC = new_pc;
                                                this->core.reg.last_branch = 1;
                                            }
                                        }
@@ -600,11 +566,13 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        if((int32_t)*(X+rs1) < (int32_t)*(X+rs2)) {
-                                            if((uint32_t)(imm ) % traits::INSTR_ALIGNMENT) {
+                                            uint32_t new_pc = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
                                            if(new_pc % traits::INSTR_ALIGNMENT) {
                                                set_tval(new_pc);
                                                raise(0, 0);
                                            }
                                            else {
-                                                *NEXT_PC = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
+                                                *NEXT_PC = new_pc;
                                                this->core.reg.last_branch = 1;
                                            }
                                        }
@@ -633,11 +601,13 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        if((int32_t)*(X+rs1) >= (int32_t)*(X+rs2)) {
-                                            if((uint32_t)(imm ) % traits::INSTR_ALIGNMENT) {
+                                            uint32_t new_pc = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
                                            if(new_pc % traits::INSTR_ALIGNMENT) {
                                                set_tval(new_pc);
                                                raise(0, 0);
                                            }
                                            else {
-                                                *NEXT_PC = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
+                                                *NEXT_PC = new_pc;
                                                this->core.reg.last_branch = 1;
                                            }
                                        }
@@ -666,11 +636,13 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        if(*(X+rs1) < *(X+rs2)) {
-                                            if((uint32_t)(imm ) % traits::INSTR_ALIGNMENT) {
+                                            uint32_t new_pc = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
                                            if(new_pc % traits::INSTR_ALIGNMENT) {
                                                set_tval(new_pc);
                                                raise(0, 0);
                                            }
                                            else {
-                                                *NEXT_PC = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
+                                                *NEXT_PC = new_pc;
                                                this->core.reg.last_branch = 1;
                                            }
                                        }
@@ -699,11 +671,13 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        if(*(X+rs1) >= *(X+rs2)) {
-                                            if((uint32_t)(imm ) % traits::INSTR_ALIGNMENT) {
+                                            uint32_t new_pc = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
                                            if(new_pc % traits::INSTR_ALIGNMENT) {
                                                set_tval(new_pc);
                                                raise(0, 0);
                                            }
                                            else {
-                                                *NEXT_PC = (uint32_t)((uint64_t)(*PC ) + (uint64_t)((int16_t)sext<13>(imm) ));
+                                                *NEXT_PC = new_pc;
                                                this->core.reg.last_branch = 1;
                                            }
                                        }
@@ -732,9 +706,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        int8_t res_27 = super::template read_mem<int8_t>(traits::MEM, load_address);
+                                        int8_t res_1 = super::template read_mem<int8_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        int8_t res = (int8_t)res_27;
+                                        int8_t res = (int8_t)res_1;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -763,9 +737,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        int16_t res_28 = super::template read_mem<int16_t>(traits::MEM, load_address);
+                                        int16_t res_2 = super::template read_mem<int16_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        int16_t res = (int16_t)res_28;
+                                        int16_t res = (int16_t)res_2;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -794,9 +768,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        int32_t res_29 = super::template read_mem<int32_t>(traits::MEM, load_address);
+                                        int32_t res_3 = super::template read_mem<int32_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        int32_t res = (int32_t)res_29;
+                                        int32_t res = (int32_t)res_3;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -825,9 +799,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        uint8_t res_30 = super::template read_mem<uint8_t>(traits::MEM, load_address);
+                                        uint8_t res_4 = super::template read_mem<uint8_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint8_t res = res_30;
+                                        uint8_t res = res_4;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -856,9 +830,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        uint16_t res_31 = super::template read_mem<uint16_t>(traits::MEM, load_address);
+                                        uint16_t res_5 = super::template read_mem<uint16_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint16_t res = res_31;
+                                        uint16_t res = res_5;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -1485,7 +1459,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::ECALL: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "ecall");
+                        //No disass specified, using instruction name
                        std::string mnemonic = "ecall";
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 4;
@@ -1498,7 +1474,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::EBREAK: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "ebreak");
+                        //No disass specified, using instruction name
                        std::string mnemonic = "ebreak";
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 4;
@@ -1511,7 +1489,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::MRET: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "mret");
+                        //No disass specified, using instruction name
                        std::string mnemonic = "mret";
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 4;
@@ -1524,7 +1504,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::WFI: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "wfi");
+                        //No disass specified, using instruction name
                        std::string mnemonic = "wfi";
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 4;
@@ -1556,9 +1538,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    else {
                                        uint32_t xrs1 = *(X+rs1);
                                        if(rd != 0) {
-                                            uint32_t res_32 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                            uint32_t res_6 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                            uint32_t xrd = res_32;
+                                            uint32_t xrd = res_6;
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrs1);
                                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
                                            *(X+rd) = xrd;
@@ -1591,9 +1573,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_33 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_7 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_33;
+                                        uint32_t xrd = res_7;
                                        uint32_t xrs1 = *(X+rs1);
                                        if(rs1 != 0) {
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrd | xrs1);
@@ -1626,9 +1608,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_34 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_8 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_34;
+                                        uint32_t xrd = res_8;
                                        uint32_t xrs1 = *(X+rs1);
                                        if(rs1 != 0) {
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrd & ~ xrs1);
@@ -1661,9 +1643,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_35 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_9 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_35;
+                                        uint32_t xrd = res_9;
                                        super::template write_mem<uint32_t>(traits::CSR, csr, (uint32_t)zimm);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
                                        if(rd != 0) {
@@ -1693,9 +1675,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_36 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_10 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_36;
+                                        uint32_t xrd = res_10;
                                        if(zimm != 0) {
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrd | (uint32_t)zimm);
                                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
@@ -1727,9 +1709,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_37 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_11 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_37;
+                                        uint32_t xrd = res_11;
                                        if(zimm != 0) {
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrd & ~ ((uint32_t)zimm));
                                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
@@ -1748,7 +1730,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
                        auto mnemonic = fmt::format(
-                            "{mnemonic:10} {rs1}, {rd}, {imm}", fmt::arg("mnemonic", "fence.i"),
+                            "{mnemonic:10} {rs1}, {rd}, {imm}", fmt::arg("mnemonic", "fence_i"),
                            fmt::arg("rs1", name(rs1)), fmt::arg("rd", name(rd)), fmt::arg("imm", imm));
                        this->core.disass_output(pc.val, mnemonic);
                    }
@@ -2064,9 +2046,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    // execute instruction
                    {
                        uint32_t offs = (uint32_t)((uint64_t)(*(X+rs1 + 8) ) + (uint64_t)(uimm ));
-                        int32_t res_38 = super::template read_mem<int32_t>(traits::MEM, offs);
+                        int32_t res_12 = super::template read_mem<int32_t>(traits::MEM, offs);
                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                        *(X+rd + 8) = (uint32_t)(int32_t)res_38;
+                        *(X+rd + 8) = (uint32_t)(int32_t)res_12;
                    }
                    break;
                }// @suppress("No break at end of case")
@@ -2122,7 +2104,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    uint8_t nzimm = ((bit_sub<2,5>(instr)) | (bit_sub<12,1>(instr) << 5));
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "c.nop");
+                        //No disass specified, using instruction name
                        std::string mnemonic = "c.nop";
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2228,7 +2212,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    uint8_t rd = ((bit_sub<7,5>(instr)));
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, ".reserved_clui");
+                        //No disass specified, using instruction name
                        std::string mnemonic = ".reserved_clui";
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2486,9 +2472,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                        }
                        else {
                            uint32_t offs = (uint32_t)((uint64_t)(*(X+2) ) + (uint64_t)(uimm ));
-                            int32_t res_39 = super::template read_mem<int32_t>(traits::MEM, offs);
+                            int32_t res_13 = super::template read_mem<int32_t>(traits::MEM, offs);
                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                            *(X+rd) = (uint32_t)(int32_t)res_39;
+                            *(X+rd) = (uint32_t)(int32_t)res_13;
                        }
                    }
                    break;
@@ -2534,7 +2520,8 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    // execute instruction
                    {
                                    if(rs1 && rs1 < traits::RFS) {
-                                        *NEXT_PC = *(X+(uint32_t)(rs1 ) % traits::RFS) & (uint32_t)(~ 1 );
+                                        uint32_t addr_mask = (uint32_t)- 2;
                                        *NEXT_PC = *(X+(uint32_t)(rs1 ) % traits::RFS) & addr_mask;
                                        this->core.reg.last_branch = 1;
                                    }
                                    else {
@@ -2546,7 +2533,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::__reserved_cmv: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, ".reserved_cmv");
+                        //No disass specified, using instruction name
                        std::string mnemonic = ".reserved_cmv";
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2600,9 +2589,10 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
                                        uint32_t addr_mask = (uint32_t)- 2;
                                        uint32_t new_pc = *(X+rs1);
                                        *(X+1) = (uint32_t)((uint64_t)(*PC ) + (uint64_t)(2 ));
-                                        *NEXT_PC = new_pc & (uint32_t)(~ 1 );
+                                        *NEXT_PC = new_pc & addr_mask;
                                        this->core.reg.last_branch = 1;
                                    }
                                }
@@ -2611,7 +2601,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::C__EBREAK: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "c.ebreak");
+                        //No disass specified, using instruction name
                        std::string mnemonic = "c.ebreak";
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2650,7 +2642,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::DII: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "dii");
+                        //No disass specified, using instruction name
                        std::string mnemonic = "dii";
                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2673,16 +2667,18 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
            //    this->core.reg.trap_state =  this->core.reg.pending_trap;
            // trap check
            if(trap_state!=0){
-                super::core.enter_trap(trap_state, pc.val, instr);
+                //In case of Instruction address misaligned (cause = 0 and trapid = 0) need the targeted addr (in tval)
                auto mcause = (trap_state>>16) & 0xff; 
                super::core.enter_trap(trap_state, pc.val, mcause ? instr:tval);
            } else {
                icount++;
                instret++;
            }
-            cycle++;
+            *PC = *NEXT_PC;
            pc.val=*NEXT_PC;
            this->core.reg.PC = this->core.reg.NEXT_PC;
            this->core.reg.trap_state =  this->core.reg.pending_trap;
        }
        fetch_count++;
        cycle++;
    }
    return pc;
 }
@@ -2699,11 +2695,12 @@ std::unique_ptr<vm_if> create<arch::tgc5c>(arch::tgc5c *core, unsigned short por
 } // namespace iss
 #include <iss/arch/riscv_hart_m_p.h>
 #include <iss/arch/riscv_hart_msu_vp.h>
 #include <iss/arch/riscv_hart_mu_p.h>
 #include <iss/factory.h>
 namespace iss {
 namespace {
-volatile std::array<bool, 2> dummy = {
+volatile std::array<bool, 3> dummy = {
        core_factory::instance().register_creator("tgc5c|m_p|interp", [](unsigned port, void* init_data) -> std::tuple<cpu_ptr, vm_ptr>{
            auto* cpu = new iss::arch::riscv_hart_m_p<iss::arch::tgc5c>();
 		    auto vm = new interp::tgc5c::vm_impl<arch::tgc5c>(*cpu, false);
@@ -2723,6 +2720,16 @@ volatile std::array<bool, 2> dummy = {
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
        }),
        core_factory::instance().register_creator("tgc5c|mus_vp|interp", [](unsigned port, void* init_data) -> std::tuple<cpu_ptr, vm_ptr>{
            auto* cpu = new iss::arch::riscv_hart_msu_vp<iss::arch::tgc5c>();
 		    auto vm = new interp::tgc5c::vm_impl<arch::tgc5c>(*cpu, false);
 		    if (port != 0) debugger::server<debugger::gdb_session>::run_server(vm, port);
            if(init_data){
                auto* cb = reinterpret_cast<semihosting_cb_t<arch::traits<arch::tgc5c>::reg_t>*>(init_data);
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
        })
 };
 }
--- a/src/vm/llvm/vm_tgc5c.cpp
+++ b/src/vm/llvm/vm_tgc5c.cpp
--- a/src/vm/tcc/vm_tgc5c.cpp
+++ b/src/vm/tcc/vm_tgc5c.cpp
Author	SHA1	Message	Date
Eyck Jentzsch	502f3e8df9	fixes htif behavior and instrumentation interface	2025-03-14 19:43:20 +01:00
Hongyu Liu	88475bfa55	changes the io_buf	2025-03-14 12:14:20 +01:00
Eyck Jentzsch	23842742a6	factors clic & pmp into separate units	2025-03-13 12:13:41 +01:00
Eyck Jentzsch	a13b7ac6d3	separates functional memory into separate unit	2025-03-12 09:26:51 +01:00
Eyck Jentzsch	fb0f6255e9	replaces virtual functions with memory pointers (kind of)	2025-03-11 08:31:25 +01:00
Eyck Jentzsch	57d5ea92be	moves common functionality to base class	2025-03-10 16:00:26 +01:00
Eyck Jentzsch	383d762abc	applies clang-format and updates SystemC HTIF implementation	2025-03-06 12:10:12 +01:00
Eyck Jentzsch	03cbd305c6	replaces literal constant with symbolic definition	2025-02-28 19:34:07 +01:00
Eyck Jentzsch	9f5326c110	extends htif for 32bit systems	2025-02-13 13:39:47 +01:00
Eyck Jentzsch	f4718c6de3	Merge remote-tracking branch 'origin/feature/htif' into develop	2025-02-13 09:34:31 +01:00
Eyck Jentzsch	53de21eef9	adds generator changed output	2025-02-12 20:45:04 +01:00
Eyck-Alexander Jentzsch	d443c89c87	removes llvm from dbt-rise-tgc build system as it is handled in dbt-rise-core	2024-12-28 13:10:49 +01:00
Eyck-Alexander Jentzsch	9a2df32d57	updates templates	2024-12-28 13:07:07 +01:00
Eyck-Alexander Jentzsch	be0f783af8	adds cycle increment to tcc	2024-12-28 13:06:46 +01:00
Eyck-Alexander Jentzsch	1089800682	updates vm_impls and core.h to work with new vm_base	2024-12-28 08:24:09 +01:00
Eyck Jentzsch	a6a6f51f0b	adds clang-format fixes	2024-12-06 15:50:50 +01:00
Eyck-Alexander Jentzsch	21e1f791ad	corrects sysc integration template and corresponding file	2024-12-06 09:49:02 +01:00
Eyck-Alexander Jentzsch	be6f5791fa	adds update to cyclecount after each instr for asmjit	2024-11-26 20:26:18 +01:00
Eyck-Alexander Jentzsch	d907dc7f54	corrects tohost functionality and minor cleanup	2024-11-22 17:35:12 +01:00
Eyck-Alexander Jentzsch	75e81ce236	copies new tohost implemenation from hart_m_p	2024-11-14 16:51:26 +01:00
Eyck-Alexander Jentzsch	82a70efdb8	small reorder to make tohost output more readable	2024-11-14 16:51:26 +01:00
Eyck-Alexander Jentzsch	978c3db06e	minor improvements to readability	2024-11-14 16:51:26 +01:00
Eyck-Alexander Jentzsch	0e88664ff7	adds better tohost writing implementation, allowing the standard riscv-isa-test benchmarks to run	2024-11-14 16:51:26 +01:00
Eyck-Alexander Jentzsch	ac818f304d	increases verbosity incase elf loading goes wrong	2024-10-21 16:42:58 +02:00
Eyck-Alexander Jentzsch	ad60449073	updates generated cores	2024-09-27 20:04:58 +02:00
Eyck-Alexander Jentzsch	b45b3589fa	updates templates to immediately trap when gen_trap is called	2024-09-27 20:03:51 +02:00
Eyck-Alexander Jentzsch	1fb7e8fcea	improves logging output	2024-09-24 08:39:34 +02:00
Eyck-Alexander Jentzsch	5f9d0beafb	corrects softfloat to comply with RVD ACT	2024-09-23 22:22:57 +02:00
Eyck-Alexander Jentzsch	4c0d1c75aa	adds addr formatting to logging	2024-09-23 12:21:43 +02:00
Eyck-Alexander Jentzsch	2f3abf2f76	adds namespaces for ELFIO	2024-09-23 11:55:18 +02:00
Eyck Jentzsch	62768bf81e	applies clang format	2024-09-23 10:05:33 +02:00
Eyck Jentzsch	f6be8ec006	adds elfio test utility	2024-09-23 09:29:08 +02:00
Eyck Jentzsch	a8f56b6e27	removes code dupication by unifying elf file read	2024-09-23 09:28:27 +02:00
Eyck-Alexander Jentzsch	76ea0db25d	adds newest generated vm_impl	2024-08-17 23:19:51 +02:00
Eyck Jentzsch	ec1b820c18	fixes target xml generation	2024-08-17 19:36:53 +02:00
Eyck Jentzsch	64329cf0f6	fixes use of icount vs. cycle	2024-08-17 19:36:40 +02:00
Eyck Jentzsch	9de0aed84d	expands some error message	2024-08-17 16:55:49 +02:00
Eyck Jentzsch	bb4e2766d1	applies clang-format	2024-08-17 16:12:57 +02:00
Eyck Jentzsch	0996d15bd4	removes debug code	2024-08-17 12:48:48 +02:00
Eyck Jentzsch	6305efa7c2	implements proper target XML generation incl. CSRs	2024-08-17 12:40:40 +02:00
Eyck Jentzsch	de79adc50d	updates debugger hook to stop before fetching instructions this relates to https://github.com/Minres/DBT-RISE-RISCV/issues/8 : Debugger loses control when trap vector fetch fails and https://github.com/Minres/DBT-RISE-RISCV/issues/7 : Two debugger single-steps are required at reset vector	2024-08-17 12:39:54 +02:00
Eyck Jentzsch	0473aa5344	fixes SystemC wrapper wrt. templated core_complex	2024-08-17 12:34:17 +02:00
Eyck-Alexander Jentzsch	a45fcd28db	updates fn calling generation	2024-08-17 08:22:04 +02:00
Eyck-Alexander Jentzsch	0f15032210	removes gen_wait as wait can be called like any other extern function	2024-08-14 15:25:06 +02:00
Eyck-Alexander Jentzsch	efc11d87a5	updates template with fcsr check, adds extra braces on If Statements	2024-08-14 14:32:58 +02:00
Eyck-Alexander Jentzsch	4a19e27926	adds changes due to generator being more inline with others	2024-08-14 13:52:08 +02:00
Eyck-Alexander Jentzsch	c15cdb0955	expands return values of jit creating functions to inhibit endless trapping	2024-08-14 11:49:59 +02:00
Eyck-Alexander Jentzsch	6609d12582	adds flimit that gets properly evaluated in interp	2024-08-13 15:22:34 +02:00
Eyck-Alexander Jentzsch	b5341700aa	updates template and adds braces when using conditions	2024-08-13 08:55:14 +02:00
Eyck-Alexander Jentzsch	0b5062d21c	adds fp_functions here to remove dependencies in dbt-rise-core	2024-08-09 11:56:32 +02:00
Eyck-Alexander Jentzsch	fbca690b3b	replaces gen_wait, updates template to include fp_functions when necessary	2024-08-08 12:57:08 +02:00
Eyck-Alexander Jentzsch	235a7e6e24	updates template	2024-08-08 11:08:28 +02:00
Eyck-Alexander Jentzsch	62d21e1156	updates disass	2024-08-07 09:21:07 +02:00
Eyck-Alexander Jentzsch	9c51d6eade	improves interp, only calls decode once per instr	2024-08-07 09:20:11 +02:00
Eyck-Alexander Jentzsch	2878dca6b5	updates templates	2024-08-06 08:32:05 +02:00
Eyck Jentzsch	c28e8fd00c	removes left-overs	2024-08-04 18:57:20 +02:00
Eyck Jentzsch	b3cc9d2346	makes core_complex a template	2024-08-04 18:47:32 +02:00
Eyck Jentzsch	933f08494c	removes C++17 dependency from asmjit backend	2024-08-04 17:41:49 +02:00
Eyck Jentzsch	21f8eab432	adds regenerated tgc5c	2024-08-02 19:18:28 +02:00
Eyck Jentzsch	6ddb8da07f	fixes missing rename	2024-08-02 11:58:51 +02:00
Eyck Jentzsch	edf456c59f	fixes missing braces	2024-08-02 10:33:15 +02:00
Eyck Jentzsch	42efced1eb	fixes FCSR behavior if no floating point is implemented	2024-08-02 08:59:22 +02:00
Eyck Jentzsch	c376e34b2b	applies clang format	2024-08-01 11:02:10 +02:00
Eyck-Alexander Jentzsch	f579ec6e48	changes access to rounding mode to fail explicitly instead of unintended behavior	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	fd20e66f1f	changes softfloat API usage, all effected Instrs pass test suite	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	5d69b79232	reverts patches in softfloat	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	2edd68d1bd	refactors zeroProd branch to allow for better case handling	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	7ffa7667b6	fixes concerning FMADD_S, FMSUB_S, FNMADD_S, and FNSUB_S mostly about ensuring correct sign	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	93d89e07ca	removes wrong compile definition	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	17dcba4b90	updates softfloat to #b51ef8f of softfloat3 https://github.com/ucb-bar/berkeley-softfloat-3/	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	39d2518fdd	checkin: tgc5f builds and runs through	2024-07-31 12:30:41 +02:00
stas	a365110054	fix format	2024-07-30 13:34:23 +02:00
Eyck Jentzsch	d2efb23ff7	fixes cache behavior for fetches	2024-07-25 19:33:50 +02:00
Eyck-Alexander Jentzsch	04b7a09b19	updates date in templates	2024-07-25 17:25:12 +02:00
Eyck-Alexander Jentzsch	72b11beac5	moves decoder to dbt-rise-core	2024-07-25 10:13:38 +02:00
Eyck Jentzsch	e87b7d5fd0	applies clang-format	2024-07-24 14:48:50 +02:00
Eyck Jentzsch	5a2b96ef3e	adds logging categories for ISS	2024-07-24 12:30:07 +02:00
Eyck-Alexander Jentzsch	c6b99cd155	introduces new decoder to interp backend	2024-07-24 12:28:35 +02:00
Eyck-Alexander Jentzsch	b1306c3a47	improves instruction decoding by avoiding copying, replaces .size()	2024-07-24 08:54:37 +02:00
Eyck-Alexander Jentzsch	0d6bf924ed	changes jh.globals from map to vector	2024-07-23 15:45:51 +02:00
Eyck-Alexander Jentzsch	86de536c8f	changes jh globals to seperate riscv specifics	2024-07-23 14:35:31 +02:00
Eyck-Alexander Jentzsch	051dd5e2d3	updates templates for decoder in seperate class, adds again generated templates	2024-07-23 13:46:10 +02:00
Eyck-Alexander Jentzsch	e3942be776	Introduces decoder in a seperate class	2024-07-23 13:08:53 +02:00
Eyck-Alexander Jentzsch	6ee484a771	moves instruction decoder into own class	2024-07-23 11:30:33 +02:00
Eyck-Alexander Jentzsch	60808c8649	corrects template since util fns are no longer vm_base members	2024-07-23 11:29:56 +02:00
Eyck-Alexander Jentzsch	0432803d82	updates templates and vm impls for better LAST_BRANCH handling	2024-07-22 09:04:17 +02:00
Eyck-Alexander Jentzsch	4f5d9214ed	adds newly generated instr.yaml	2024-07-18 14:31:36 +02:00
Eyck-Alexander Jentzsch	d42d2ce533	corrects illegal instruction for llvm	2024-07-18 14:04:23 +02:00
Eyck-Alexander Jentzsch	236d12d7f5	integrates gen_bool for Conditions (was truncation) into llvm	2024-07-18 13:30:42 +02:00
Eyck-Alexander Jentzsch	e1b6cab890	removes setting of NEXT_PC to max when trapping in llvm and asmjit, adds default disass to llvm	2024-07-18 12:02:40 +02:00
Eyck-Alexander Jentzsch	8361f88718	removes setting of NEXT_PC to max if trap	2024-07-18 11:37:53 +02:00
Eyck-Alexander Jentzsch	2ec7ea4b41	removes leftover gen_sync in asmjit	2024-07-17 22:39:12 +02:00
Eyck-Alexander Jentzsch	b24965d321	corrects gen_sync update order, improves illegal instruction	2024-07-17 20:52:01 +02:00
Eyck-Alexander Jentzsch	244bf6d2f2	corrects gen_sync before trap check, improves illegal_instruction	2024-07-17 20:25:49 +02:00
Eyck-Alexander Jentzsch	1a4465a371	changes template: adds correct illegal instruction, reorders gen_sync to allow correct instr id eve when trapping, adds newly generated vm	2024-07-17 19:59:01 +02:00
Eyck-Alexander Jentzsch	fa82a50824	fixes typo in templates	2024-07-17 17:24:17 +02:00
Eyck-Alexander Jentzsch	6dc17857da	updates template	2024-07-17 15:36:08 +02:00
Eyck-Alexander Jentzsch	11a30caae8	integrates generator changes to canPrecompute	2024-07-17 15:14:13 +02:00
Eyck-Alexander Jentzsch	ac1a26a10c	integrates new tval changes into llvm	2024-07-17 14:17:02 +02:00
Eyck-Alexander Jentzsch	7a199e122d	integrates new tval changes into asmjit	2024-07-17 09:42:12 +02:00
Eyck-Alexander Jentzsch	d8c3d2e19c	integrates new tval changes into tcc	2024-07-16 17:35:23 +02:00
Eyck-Alexander Jentzsch	375755999a	integrates new tval changes	2024-07-16 15:32:35 +02:00
stas	9996fd4833	change cache line size to 64	2024-07-11 14:03:58 +02:00
Eyck-Alexander Jentzsch	149b3136d2	updates generated files	2024-07-10 12:55:36 +02:00
Eyck-Alexander Jentzsch	ac8f8b0539	updates vms with fixed Zc in tgc5c.core_desc	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	b2cbf90d0b	updates generated files	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	373145478e	updats file because of generator changes	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	55b0cea94f	changes vm_base util API	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	5b17599aa2	allows usage of std::variants	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	4cfb15c7cd	Asmjit and interp working	2024-07-10 12:51:31 +02:00
Eyck Jentzsch	63da7f8d57	applies clang-format	2024-07-09 13:57:11 +02:00
Eyck Jentzsch	fb4012fbd1	moves likely annotation	2024-07-09 13:52:10 +02:00
Eyck Jentzsch	24449f1c0f	fixes some elf load issue	2024-07-05 12:18:36 +02:00
Eyck Jentzsch	fd303c8343	fixes asmjit deprecation warning	2024-07-05 07:51:37 +02:00
`@@ -221,4 +221,3 @@ float32_t`
	`return uZ.f;`	`return uZ.f;`

	`}`	`}`