fixes htif behavior and instrumentation interface

changes the io_buf
factors clic & pmp into separate units
2025-03-14 19:43:20 +01:00 · 2025-03-14 12:14:20 +01:00 · 2025-03-13 12:13:41 +01:00 · 2025-03-12 09:26:51 +01:00 · 2025-03-11 08:31:25 +01:00 · 2025-03-10 16:00:26 +01:00
526 changed files with 59970 additions and 55624 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,10 @@ add_subdirectory(softfloat)
 set(LIB_SOURCES
    src/iss/plugin/instruction_count.cpp
    src/iss/arch/tgc5c.cpp
+    src/iss/mmio/memory_if.cpp
    src/vm/interp/vm_tgc5c.cpp
    src/vm/fp_functions.cpp
+    src/iss/debugger/csr_names.cpp
    src/iss/semihosting/semihosting.cpp
 )

@@ -108,16 +110,6 @@ if(TARGET yaml-cpp::yaml-cpp)
    target_link_libraries(${PROJECT_NAME} PUBLIC yaml-cpp::yaml-cpp)
 endif()

-if(WITH_LLVM)
-    find_package(LLVM)
-    target_compile_definitions(${PROJECT_NAME} PUBLIC ${LLVM_DEFINITIONS})
-    target_include_directories(${PROJECT_NAME} PUBLIC ${LLVM_INCLUDE_DIRS})
-
-    if(BUILD_SHARED_LIBS)
-        target_link_libraries(${PROJECT_NAME} PUBLIC ${LLVM_LIBRARIES})
-    endif()
-endif()
-
 set_target_properties(${PROJECT_NAME} PROPERTIES
    VERSION ${PROJECT_VERSION}
    FRAMEWORK FALSE
@@ -261,3 +253,9 @@ if(TARGET scc-sysc)
        INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # headers
    )
 endif()
+
+project(elfio-test)
+find_package(Boost COMPONENTS program_options thread REQUIRED)
+
+add_executable(${PROJECT_NAME} src/elfio.cpp)
+target_link_libraries(${PROJECT_NAME} PUBLIC elfio::elfio)
--- a/gen_input/templates/CORENAME.cpp.gtl
+++ b/gen_input/templates/CORENAME.cpp.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017 - 2020 MINRES Technologies GmbH
+ * Copyright (C) 2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -47,10 +47,10 @@ def getRegisterSizes(){

 using namespace iss::arch;

-constexpr std::array<const char*, ${registers.size}>    iss::arch::traits<iss::arch::${coreDef.name.toLowerCase()}>::reg_names;
-constexpr std::array<const char*, ${registers.size}>    iss::arch::traits<iss::arch::${coreDef.name.toLowerCase()}>::reg_aliases;
-constexpr std::array<const uint32_t, ${getRegisterSizes().size}> iss::arch::traits<iss::arch::${coreDef.name.toLowerCase()}>::reg_bit_widths;
-constexpr std::array<const uint32_t, ${getRegisterSizes().size}> iss::arch::traits<iss::arch::${coreDef.name.toLowerCase()}>::reg_byte_offsets;
+constexpr std::array<const char*, ${registers.size()}>    iss::arch::traits<iss::arch::${coreDef.name.toLowerCase()}>::reg_names;
+constexpr std::array<const char*, ${registers.size()}>    iss::arch::traits<iss::arch::${coreDef.name.toLowerCase()}>::reg_aliases;
+constexpr std::array<const uint32_t, ${getRegisterSizes().size()}> iss::arch::traits<iss::arch::${coreDef.name.toLowerCase()}>::reg_bit_widths;
+constexpr std::array<const uint32_t, ${getRegisterSizes().size()}> iss::arch::traits<iss::arch::${coreDef.name.toLowerCase()}>::reg_byte_offsets;

 ${coreDef.name.toLowerCase()}::${coreDef.name.toLowerCase()}()  = default;

--- a/gen_input/templates/CORENAME.h.gtl
+++ b/gen_input/templates/CORENAME.h.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017 - 2021 MINRES Technologies GmbH
+ * Copyright (C) 2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -75,10 +75,10 @@ template <> struct traits<${coreDef.name.toLowerCase()}> {

    constexpr static char const* const core_type = "${coreDef.name}";
    
-    static constexpr std::array<const char*, ${registers.size}> reg_names{
+    static constexpr std::array<const char*, ${registers.size()}> reg_names{
        {"${registers.collect{it.name.toLowerCase()}.join('", "')}"}};
 
-    static constexpr std::array<const char*, ${registers.size}> reg_aliases{
+    static constexpr std::array<const char*, ${registers.size()}> reg_aliases{
        {"${registers.collect{it.alias.toLowerCase()}.join('", "')}"}};

    enum constants {${constants.collect{c -> c.name+"="+getCString(c.value)}.join(', ')}};
@@ -99,10 +99,10 @@ template <> struct traits<${coreDef.name.toLowerCase()}> {

    using phys_addr_t = iss::typed_addr_t<iss::address_type::PHYSICAL>;

-    static constexpr std::array<const uint32_t, ${getRegisterSizes().size}> reg_bit_widths{
+    static constexpr std::array<const uint32_t, ${getRegisterSizes().size()}> reg_bit_widths{
        {${getRegisterSizes().join(',')}}};

-    static constexpr std::array<const uint32_t, ${getRegisterOffsets().size}> reg_byte_offsets{
+    static constexpr std::array<const uint32_t, ${getRegisterOffsets().size()}> reg_byte_offsets{
        {${getRegisterOffsets().join(',')}}};

    static const uint64_t addr_mask = (reg_t(1) << (XLEN - 1)) | ((reg_t(1) << (XLEN - 1)) - 1);
@@ -131,8 +131,6 @@ struct ${coreDef.name.toLowerCase()}: public arch_if {

    uint8_t* get_regs_base_ptr() override;

-    inline uint64_t get_icount() { return reg.icount; }
-
    inline bool should_stop() { return interrupt_sim; }

    inline uint64_t stop_code() { return interrupt_sim; }
@@ -141,8 +139,6 @@ struct ${coreDef.name.toLowerCase()}: public arch_if {

    virtual iss::sync_type needed_sync() const { return iss::NO_SYNC; }

-    inline uint32_t get_last_branch() { return reg.last_branch; }
-

 #pragma pack(push, 1)
    struct ${coreDef.name}_regs {<%
--- a/gen_input/templates/CORENAME_sysc.cpp.gtl
+++ b/gen_input/templates/CORENAME_sysc.cpp.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2023 MINRES Technologies GmbH
+ * Copyright (C) 2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -45,17 +45,17 @@ namespace interp {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -66,17 +66,17 @@ namespace llvm {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -88,17 +88,17 @@ namespace tcc {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -110,17 +110,17 @@ namespace asmjit {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
--- a/gen_input/templates/asmjit/CORENAME.cpp.gtl
+++ b/gen_input/templates/asmjit/CORENAME.cpp.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017, 2023 MINRES Technologies GmbH
+ * Copyright (C) 2017-2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,10 @@
 #include <iss/asmjit/vm_base.h>
 #include <asmjit/asmjit.h>
 #include <util/logging.h>
-
+#include <iss/instruction_decoder.h>
+<%def fcsr = registers.find {it.name=='FCSR'}
+if(fcsr != null) {%>
+#include <vm/fp_functions.h><%}%>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -80,28 +83,32 @@ public:

 protected:
    using super::get_ptr_for;
-using super::get_reg;
    using super::get_reg_for;
+    using super::get_reg_for_Gp;
    using super::load_reg_from_mem;
+    using super::load_reg_from_mem_Gp;
    using super::write_reg_to_mem;
-    using super::gen_ext;
    using super::gen_read_mem;
    using super::gen_write_mem;
-    using super::gen_wait;
    using super::gen_leave;
-    using super::gen_operation;
+    using super::gen_sync;
   
    using this_class = vm_impl<ARCH>;
    using compile_func = continuation_e (this_class::*)(virt_addr_t&, code_word_t, jit_holder&);

-    continuation_e gen_single_inst_behavior(virt_addr_t&, unsigned int &, jit_holder&) override;
+    continuation_e gen_single_inst_behavior(virt_addr_t&, jit_holder&) override;
+    enum globals_e {TVAL = 0, GLOBALS_SIZE};
    void gen_block_prologue(jit_holder& jh) override;
    void gen_block_epilogue(jit_holder& jh) override;
    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
-
+<%if(fcsr != null) {%>
+    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
+<%}%>
    void gen_instr_prologue(jit_holder& jh);
    void gen_instr_epilogue(jit_holder& jh);
    inline void gen_raise(jit_holder& jh, uint16_t trap_id, uint16_t cause);
+    template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type> void gen_set_tval(jit_holder& jh, T new_tval) ;
+    void gen_set_tval(jit_holder& jh, x86_reg_t _new_tval) ;

    template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
    inline S sext(U from) {
@@ -109,32 +116,29 @@ using super::get_reg;
        auto sign_mask = 1ULL<<(W-1);
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
    }
+<%functions.each{ it.eachLine { %>
+    ${it}<%}%>
+<%}%>
 private:
    /****************************************************************************
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
-        size_t length;
+        uint32_t length;
        uint32_t value;
        uint32_t mask;
        compile_func op;
    };
-    struct decoding_tree_node{
-        std::vector<instruction_descriptor> instrs;
-        std::vector<decoding_tree_node*> children;
-        uint32_t submask = std::numeric_limits<uint32_t>::max();
-        uint32_t value;
-        decoding_tree_node(uint32_t value) : value(value){}
-    };

-    decoding_tree_node* root {nullptr};
-
-    const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
+    const std::array<instruction_descriptor, ${instructions.size()}> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
        /* instruction ${instr.instruction.name}, encoding '${instr.encoding}' */
        {${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
    }};

+    //needs to be declared after instr_descr
+    decoder instr_decoder;
+
    /* instruction definitions */<%instructions.eachWithIndex{instr, idx -> %>
    /* instruction ${idx}: ${instr.name} */
    continuation_e __${generator.functionName(instr.name)}(virt_addr_t& pc, code_word_t instr, jit_holder& jh){
@@ -147,7 +151,7 @@ private:
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
            jh.disass_collection.push_back(mnemonic_ptr);
-            jh.cc.invoke(&call_print_disass, &print_disass, FuncSignatureT<void, void *, uint64_t, char *>());
+            jh.cc.invoke(&call_print_disass, &print_disass, FuncSignature::build<void, void *, uint64_t, char *>());
            call_print_disass->setArg(0, jh.arch_if_ptr);
            call_print_disass->setArg(1, pc.val);
            call_print_disass->setArg(2, mnemonic_ptr);
@@ -155,87 +159,49 @@ private:
        }
        x86::Compiler& cc = jh.cc;
        cc.comment(fmt::format("${instr.name}_{:#x}:",pc.val).c_str());
-        this->gen_sync(jh, PRE_SYNC, ${idx});
-        cc.mov(jh.pc, pc.val);
+        gen_sync(jh, PRE_SYNC, ${idx});
+        mov(cc, jh.pc, pc.val);
+        gen_set_tval(jh, instr);
        pc = pc+${instr.length/8};
-        cc.mov(jh.next_pc, pc.val);
+        mov(cc, jh.next_pc, pc.val);

        gen_instr_prologue(jh);
        cc.comment("//behavior:");
        /*generate behavior*/
        <%instr.behavior.eachLine{%>${it}
        <%}%>
+        gen_sync(jh, POST_SYNC, ${idx});
        gen_instr_epilogue(jh);
-        this->gen_sync(jh, POST_SYNC, ${idx});
    	return returnValue;        
    }
    <%}%>
    /****************************************************************************
     * end opcode definitions
     ****************************************************************************/
-    continuation_e illegal_intruction(virt_addr_t &pc, code_word_t instr, jit_holder& jh ) {
+    continuation_e illegal_instruction(virt_addr_t &pc, code_word_t instr, jit_holder& jh ) {
        x86::Compiler& cc = jh.cc;
-        cc.comment(fmt::format("illegal_intruction{:#x}:",pc.val).c_str());
-        this->gen_sync(jh, PRE_SYNC, instr_descr.size());
+        if(this->disass_enabled){          
+            auto mnemonic = std::string("illegal_instruction");
+            InvokeNode* call_print_disass;
+            char* mnemonic_ptr = strdup(mnemonic.c_str());
+            jh.disass_collection.push_back(mnemonic_ptr);
+            jh.cc.invoke(&call_print_disass, &print_disass, FuncSignature::build<void, void *, uint64_t, char *>());
+            call_print_disass->setArg(0, jh.arch_if_ptr);
+            call_print_disass->setArg(1, pc.val);
+            call_print_disass->setArg(2, mnemonic_ptr);
+        }
+        cc.comment(fmt::format("illegal_instruction{:#x}:",pc.val).c_str());
+        gen_sync(jh, PRE_SYNC, instr_descr.size());
+        mov(cc, jh.pc, pc.val);
+        gen_set_tval(jh, instr);
        pc = pc + ((instr & 3) == 3 ? 4 : 2);
+        mov(cc, jh.next_pc, pc.val);
        gen_instr_prologue(jh);
        cc.comment("//behavior:");
+        gen_raise(jh, 0, 2);
+        gen_sync(jh, POST_SYNC, instr_descr.size());
        gen_instr_epilogue(jh);
-        this->gen_sync(jh, POST_SYNC, instr_descr.size());
-        return BRANCH;
-    }
-     
-    //decoding functionality
-
-    void populate_decoding_tree(decoding_tree_node* root){
-        //create submask
-        for(auto instr: root->instrs){
-            root->submask &= instr.mask;
-        }
-        //put each instr according to submask&encoding into children
-        for(auto instr: root->instrs){
-            bool foundMatch = false;
-            for(auto child: root->children){
-                //use value as identifying trait
-                if(child->value == (instr.value&root->submask)){
-                    child->instrs.push_back(instr);
-                    foundMatch = true;
-                }
-            }
-            if(!foundMatch){
-                decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
-                child->instrs.push_back(instr);
-                root->children.push_back(child);
-            }
-        }
-        root->instrs.clear();
-        //call populate_decoding_tree for all children
-        if(root->children.size() >1)
-            for(auto child: root->children){
-                populate_decoding_tree(child);      
-            }
-        else{
-            //sort instrs by value of the mask, this works bc we want to have the least restrictive one last
-            std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
-            return instr1.mask > instr2.mask;
-            }); 
-        }
-    }
-    compile_func decode_instr(decoding_tree_node* node, code_word_t word){
-        if(!node->children.size()){
-            if(node->instrs.size() == 1) return node->instrs[0].op;
-            for(auto instr : node->instrs){
-                if((instr.mask&word) == instr.value) return instr.op;
-            }
-        }
-        else{
-            for(auto child : node->children){
-                if (child->value == (node->submask&word)){
-                    return decode_instr(child, word);
-                }  
-            }  
-        }
-        return nullptr;
+        return ILLEGAL_INSTR;
    }
 };

@@ -243,16 +209,19 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }

 template <typename ARCH>
 vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
-: vm_base<ARCH>(core, core_id, cluster_id) {
-    root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
-    for(auto instr: instr_descr){
-        root->instrs.push_back(instr);
-    }
-    populate_decoding_tree(root);
-}
+: vm_base<ARCH>(core, core_id, cluster_id)
+, instr_decoder([this]() {
+        std::vector<generic_instruction_descriptor> g_instr_descr;
+        g_instr_descr.reserve(instr_descr.size());
+        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
+            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
+            g_instr_descr.push_back(new_instr_descr);
+        }
+        return std::move(g_instr_descr);
+    }()) {}

 template <typename ARCH>
-continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, jit_holder& jh) {
+continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, jit_holder& jh) {
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
@@ -261,13 +230,15 @@ continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned
        paddr = this->core.virt2phys(pc);
    auto res = this->core.read(paddr, 4, data);
    if (res != iss::Ok)
-        throw trap_access(TRAP_ID, pc.val);
+        return ILLEGAL_FETCH;
    if (instr == 0x0000006f || (instr&0xffff)==0xa001)
-        throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-    ++inst_cnt;
-    auto f = decode_instr(root, instr);
+        return JUMP_TO_SELF;
+    uint32_t inst_index = instr_decoder.decode_instr(instr);
+    compile_func f = nullptr;
+    if(inst_index < instr_descr.size())
+        f = instr_descr[inst_index].op;
    if (f == nullptr) 
-        f = &this_class::illegal_intruction;
+        f = &this_class::illegal_instruction;
    return (this->*f)(pc, instr, jh);
 }
 template <typename ARCH>
@@ -275,11 +246,10 @@ void vm_impl<ARCH>::gen_instr_prologue(jit_holder& jh) {
    auto& cc = jh.cc;

    cc.comment("//gen_instr_prologue");
-    cc.inc(get_ptr_for(jh, traits::ICOUNT));

-    x86::Gp current_trap_state = get_reg_for(jh, traits::TRAP_STATE);
-    cc.mov(current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
-    cc.mov(get_ptr_for(jh, traits::PENDING_TRAP), current_trap_state);
+    x86_reg_t current_trap_state = get_reg_for(cc, traits::TRAP_STATE);
+    mov(cc, current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
+    mov(cc, get_ptr_for(jh, traits::PENDING_TRAP), current_trap_state);

 }
 template <typename ARCH>
@@ -287,16 +257,19 @@ void vm_impl<ARCH>::gen_instr_epilogue(jit_holder& jh) {
    auto& cc = jh.cc;

    cc.comment("//gen_instr_epilogue");
-    x86::Gp current_trap_state = get_reg_for(jh, traits::TRAP_STATE);
-    cc.mov(current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
-    cc.cmp(current_trap_state, 0);
+    x86_reg_t current_trap_state = get_reg_for(cc, traits::TRAP_STATE);
+    mov(cc, current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
+    cmp(cc, current_trap_state, 0);
    cc.jne(jh.trap_entry);
+    cc.inc(get_ptr_for(jh, traits::ICOUNT));
+    cc.inc(get_ptr_for(jh, traits::CYCLE));
 }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_block_prologue(jit_holder& jh){
-
-    jh.pc = load_reg_from_mem(jh, traits::PC);
-    jh.next_pc = load_reg_from_mem(jh, traits::NEXT_PC);
+    jh.pc = load_reg_from_mem_Gp(jh, traits::PC);
+    jh.next_pc = load_reg_from_mem_Gp(jh, traits::NEXT_PC);
+    jh.globals.resize(GLOBALS_SIZE);
+    jh.globals[TVAL] = get_reg_Gp(jh.cc, 64, false);
 }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_block_epilogue(jit_holder& jh){
@@ -306,39 +279,52 @@ void vm_impl<ARCH>::gen_block_epilogue(jit_holder& jh){

    cc.bind(jh.trap_entry);
    this->write_back(jh);
-    this->gen_sync(jh, POST_SYNC, -1);

-    x86::Gp current_trap_state = get_reg_for(jh, traits::TRAP_STATE);
-    cc.mov(current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));
+    x86::Gp current_trap_state = get_reg_for_Gp(cc, traits::TRAP_STATE);
+    mov(cc, current_trap_state, get_ptr_for(jh, traits::TRAP_STATE));

-    x86::Gp current_pc = get_reg_for(jh, traits::PC);
-    cc.mov(current_pc, get_ptr_for(jh, traits::PC));
+    x86::Gp current_pc = get_reg_for_Gp(cc, traits::PC);
+    mov(cc, current_pc, get_ptr_for(jh, traits::PC));

-    x86::Gp instr = cc.newInt32("instr");
-    cc.mov(instr, 0); // FIXME:this is not correct
    cc.comment("//enter trap call;");
    InvokeNode* call_enter_trap;
-    cc.invoke(&call_enter_trap, &enter_trap, FuncSignatureT<uint64_t, void*, uint64_t, uint64_t, uint64_t>());
+    cc.invoke(&call_enter_trap, &enter_trap, FuncSignature::build<uint64_t, void*, uint64_t, uint64_t, uint64_t>());
    call_enter_trap->setArg(0, jh.arch_if_ptr);
    call_enter_trap->setArg(1, current_trap_state);
    call_enter_trap->setArg(2, current_pc);
-    call_enter_trap->setArg(3, instr);
+    call_enter_trap->setArg(3, jh.globals[TVAL]);

-    x86::Gp current_next_pc = get_reg_for(jh, traits::NEXT_PC);
-    cc.mov(current_next_pc, get_ptr_for(jh, traits::NEXT_PC));
-    cc.mov(jh.next_pc, current_next_pc);
+    x86_reg_t current_next_pc = get_reg_for(cc, traits::NEXT_PC);
+    mov(cc, current_next_pc, get_ptr_for(jh, traits::NEXT_PC));
+    mov(cc, jh.next_pc, current_next_pc);

-    cc.mov(get_ptr_for(jh, traits::LAST_BRANCH), std::numeric_limits<uint32_t>::max());
+    mov(cc, get_ptr_for(jh, traits::LAST_BRANCH), static_cast<int>(UNKNOWN_JUMP));
    cc.ret(jh.next_pc);
 }
 template <typename ARCH>
 inline void vm_impl<ARCH>::gen_raise(jit_holder& jh, uint16_t trap_id, uint16_t cause) {
    auto& cc = jh.cc;
    cc.comment("//gen_raise");
-    auto tmp1 = get_reg_for(jh, traits::TRAP_STATE);
-    cc.mov(tmp1, 0x80ULL << 24 | (cause << 16) | trap_id);
-    cc.mov(get_ptr_for(jh, traits::TRAP_STATE), tmp1);
-    cc.mov(jh.next_pc, std::numeric_limits<uint32_t>::max());
+    auto tmp1 = get_reg_for(cc, traits::TRAP_STATE);
+    mov(cc, tmp1, 0x80ULL << 24 | (cause << 16) | trap_id);
+    mov(cc, get_ptr_for(jh, traits::TRAP_STATE), tmp1);
+    cc.jmp(jh.trap_entry);
+}
+template <typename ARCH>
+template <typename T, typename>
+void vm_impl<ARCH>::gen_set_tval(jit_holder& jh, T new_tval) {
+        mov(jh.cc, jh.globals[TVAL], new_tval);
+    }
+template <typename ARCH>
+void vm_impl<ARCH>::gen_set_tval(jit_holder& jh, x86_reg_t _new_tval) {
+    if(nonstd::holds_alternative<x86::Gp>(_new_tval)) {
+        x86::Gp new_tval = nonstd::get<x86::Gp>(_new_tval);
+        if(new_tval.size() < 8)
+            new_tval = gen_ext_Gp(jh.cc, new_tval, 64, false);
+        mov(jh.cc, jh.globals[TVAL], new_tval);
+    } else {
+        throw std::runtime_error("Variant not supported in gen_set_tval");
+    }
 }

 } // namespace tgc5c
@@ -363,7 +349,7 @@ volatile std::array<bool, 2> dummy = {
 		    auto vm = new asmjit::${coreDef.name.toLowerCase()}::vm_impl<arch::${coreDef.name.toLowerCase()}>(*cpu, false);
 		    if (port != 0) debugger::server<debugger::gdb_session>::run_server(vm, port);
            if(init_data){
-                auto* cb = reinterpret_cast<std::function<void(arch_if*, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t)>*>(init_data);
+                auto* cb = reinterpret_cast<semihosting_cb_t<arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t>*>(init_data);
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
@@ -373,7 +359,7 @@ volatile std::array<bool, 2> dummy = {
 		    auto vm = new asmjit::${coreDef.name.toLowerCase()}::vm_impl<arch::${coreDef.name.toLowerCase()}>(*cpu, false);
 		    if (port != 0) debugger::server<debugger::gdb_session>::run_server(vm, port);
            if(init_data){
-                auto* cb = reinterpret_cast<std::function<void(arch_if*, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t)>*>(init_data);
+                auto* cb = reinterpret_cast<semihosting_cb_t<arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t>*>(init_data);
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
--- a/gen_input/templates/interp/CORENAME.cpp.gtl
+++ b/gen_input/templates/interp/CORENAME.cpp.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2021 MINRES Technologies GmbH
+ * Copyright (C) 2017-2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@ def nativeTypeSize(int size){
 }
 %>
 // clang-format off
+#include <cstdint>
 #include <iss/arch/${coreDef.name.toLowerCase()}.h>
 #include <iss/debugger/gdb_session.h>
 #include <iss/debugger/server.h>
@@ -47,6 +48,8 @@ def nativeTypeSize(int size){
 #include <exception>
 #include <vector>
 #include <sstream>
+#include <iss/instruction_decoder.h>
+

 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
@@ -97,7 +100,12 @@ protected:
    using compile_ret_t = virt_addr_t;
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr);

-    inline const char *name(size_t index){return index<traits::reg_aliases.size()?traits::reg_aliases[index]:"illegal";}
+    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
+<%
+def fcsr = registers.find {it.name=='FCSR'}
+if(fcsr != null) {%>
+    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}     
+<%}%>

    virt_addr_t execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit) override;

@@ -106,7 +114,6 @@ protected:
    inline void raise(uint16_t trap_id, uint16_t cause){
        auto trap_val =  0x80ULL << 24 | (cause << 16) | trap_id;
        this->core.reg.trap_state = trap_val;
-        this->template get_reg<uint${addrDataWidth}_t>(traits::NEXT_PC) = std::numeric_limits<uint${addrDataWidth}_t>::max();
    }

    inline void leave(unsigned lvl){
@@ -117,6 +124,13 @@ protected:
        this->core.wait_until(type);
    }

+    inline void set_tval(uint64_t new_tval){
+        tval = new_tval;
+    }
+
+    uint64_t fetch_count{0};
+    uint64_t tval{0};
+
    using yield_t = boost::coroutines2::coroutine<void>::push_type;
    using coro_t = boost::coroutines2::coroutine<void>::pull_type;
    std::vector<coro_t> spawn_blocks;
@@ -146,25 +160,20 @@ private:
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
-        size_t length;
+        uint32_t length;
        uint32_t value;
        uint32_t mask;
        typename arch::traits<ARCH>::opcode_e op;
    };
-    struct decoding_tree_node{
-        std::vector<instruction_descriptor> instrs;
-        std::vector<decoding_tree_node*> children;
-        uint32_t submask = std::numeric_limits<uint32_t>::max();
-        uint32_t value;
-        decoding_tree_node(uint32_t value) : value(value){}
-    };

-    decoding_tree_node* root {nullptr};
-    const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
+    const std::array<instruction_descriptor, ${instructions.size()}> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
        {${instr.length}, ${instr.encoding}, ${instr.mask}, arch::traits<ARCH>::opcode_e::${instr.instruction.name}},<%}%>
    }};

+    //needs to be declared after instr_descr
+    decoder instr_decoder;
+
    iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
        if(this->core.has_mmu()) {
            auto phys_pc = this->core.virt2phys(pc);
@@ -184,66 +193,12 @@ private:
        }
        return iss::Ok;
    }
-    
-    void populate_decoding_tree(decoding_tree_node* root){
-        //create submask
-        for(auto instr: root->instrs){
-            root->submask &= instr.mask;
-        }
-        //put each instr according to submask&encoding into children
-        for(auto instr: root->instrs){
-            bool foundMatch = false;
-            for(auto child: root->children){
-                //use value as identifying trait
-                if(child->value == (instr.value&root->submask)){
-                    child->instrs.push_back(instr);
-                    foundMatch = true;
-                }
-            }
-            if(!foundMatch){
-                decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
-                child->instrs.push_back(instr);
-                root->children.push_back(child);
-            }
-        }
-        root->instrs.clear();
-        //call populate_decoding_tree for all children
-        if(root->children.size() >1)
-            for(auto child: root->children){
-                populate_decoding_tree(child);      
-            }
-        else{
-            //sort instrs by value of the mask, this works bc we want to have the least restrictive one last
-            std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
-            return instr1.mask > instr2.mask;
-            }); 
-        }
-    }
-    typename arch::traits<ARCH>::opcode_e  decode_instr(decoding_tree_node* node, code_word_t word){
-        if(!node->children.size()){
-            if(node->instrs.size() == 1) return node->instrs[0].op;
-            for(auto instr : node->instrs){
-                if((instr.mask&word) == instr.value) return instr.op;
-            }
-        }
-        else{
-            for(auto child : node->children){
-                if (child->value == (node->submask&word)){
-                    return decode_instr(child, word);
-                }  
-            }  
-        }
-        return arch::traits<ARCH>::opcode_e::MAX_OPCODE;
-    }
 };

 template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
    volatile CODE_WORD x = insn;
    insn = 2 * x;
 }
-
-template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
-
 // according to
 // https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation
 #ifdef __GCC__
@@ -260,16 +215,23 @@ constexpr size_t bit_count(uint32_t u) {

 template <typename ARCH>
 vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
-: vm_base<ARCH>(core, core_id, cluster_id) {
-    root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
-    for(auto instr:instr_descr){
-        root->instrs.push_back(instr);
-    }
-    populate_decoding_tree(root);
+: vm_base<ARCH>(core, core_id, cluster_id)
+, instr_decoder([this]() {
+        std::vector<generic_instruction_descriptor> g_instr_descr;
+        g_instr_descr.reserve(instr_descr.size());
+        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
+            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
+            g_instr_descr.push_back(new_instr_descr);
+        }
+        return std::move(g_instr_descr);
+    }()) {}
+
+inline bool is_icount_limit_enabled(finish_cond_e cond){
+    return (cond & finish_cond_e::ICOUNT_LIMIT) == finish_cond_e::ICOUNT_LIMIT;
 }

-inline bool is_count_limit_enabled(finish_cond_e cond){
-    return (cond & finish_cond_e::COUNT_LIMIT) == finish_cond_e::COUNT_LIMIT;
+inline bool is_fcount_limit_enabled(finish_cond_e cond){
+    return (cond & finish_cond_e::FCOUNT_LIMIT) == finish_cond_e::FCOUNT_LIMIT;
 }

 inline bool is_jump_to_self_enabled(finish_cond_e cond){
@@ -277,7 +239,7 @@ inline bool is_jump_to_self_enabled(finish_cond_e cond){
 }

 template <typename ARCH>
-typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit){
+typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t count_limit){
    auto pc=start;
    auto* PC = reinterpret_cast<uint${addrDataWidth}_t*>(this->regs_base_ptr+arch::traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::PC]);
    auto* NEXT_PC = reinterpret_cast<uint${addrDataWidth}_t*>(this->regs_base_ptr+arch::traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::NEXT_PC]);
@@ -290,16 +252,26 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
    auto *const data = reinterpret_cast<uint8_t*>(&instr);

    while(!this->core.should_stop() &&
-            !(is_count_limit_enabled(cond) && icount >= icount_limit)){
+            !(is_icount_limit_enabled(cond) && icount >= count_limit) &&
+            !(is_fcount_limit_enabled(cond) && fetch_count >= count_limit)){
+        if(this->debugging_enabled())
+            this->tgt_adapter->check_continue(*PC);
+        pc.val=*PC;
        if(fetch_ins(pc, data)!=iss::Ok){
-            this->do_sync(POST_SYNC, std::numeric_limits<unsigned>::max());
-            pc.val = super::core.enter_trap(std::numeric_limits<uint64_t>::max(), pc.val, 0);
+            if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
+            process_spawn_blocks();
+            if(this->sync_exec && POST_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
+            pc.val = super::core.enter_trap(arch::traits<ARCH>::RV_CAUSE_FETCH_ACCESS<<16, pc.val, 0);
        } else {
            if (is_jump_to_self_enabled(cond) &&
                    (instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-            auto inst_id = decode_instr(root, instr);
+            uint32_t inst_index = instr_decoder.decode_instr(instr);
+            opcode_e inst_id = arch::traits<ARCH>::opcode_e::MAX_OPCODE;;
+            if(inst_index <instr_descr.size())
+                inst_id = instr_descr[inst_index].op;
+
            // pre execution stuff
-             this->core.reg.last_branch = 0;
+            this->core.reg.last_branch = 0;
            if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, static_cast<unsigned>(inst_id));
            try{
                switch(inst_id){<%instructions.eachWithIndex{instr, idx -> %>
@@ -308,6 +280,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    <%}%>if(this->disass_enabled){
                        /* generate console output when executing the command */<%instr.disass.eachLine{%>
                        ${it}<%}%>
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers<%instr.usedVariables.each{ k,v->
                    if(v.isArray) {%>
@@ -332,16 +305,18 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
            //    this->core.reg.trap_state =  this->core.reg.pending_trap;
            // trap check
            if(trap_state!=0){
-                super::core.enter_trap(trap_state, pc.val, instr);
+                //In case of Instruction address misaligned (cause = 0 and trapid = 0) need the targeted addr (in tval)
+                auto mcause = (trap_state>>16) & 0xff; 
+                super::core.enter_trap(trap_state, pc.val, mcause ? instr:tval);
            } else {
                icount++;
                instret++;
            }
-            cycle++;
-            pc.val=*NEXT_PC;
-            this->core.reg.PC = this->core.reg.NEXT_PC;
+            *PC = *NEXT_PC;
            this->core.reg.trap_state =  this->core.reg.pending_trap;
        }
+        fetch_count++;
+        cycle++;
    }
    return pc;
 }
@@ -368,7 +343,7 @@ volatile std::array<bool, 2> dummy = {
 		    auto vm = new interp::${coreDef.name.toLowerCase()}::vm_impl<arch::${coreDef.name.toLowerCase()}>(*cpu, false);
 		    if (port != 0) debugger::server<debugger::gdb_session>::run_server(vm, port);
            if(init_data){
-                auto* cb = reinterpret_cast<std::function<void(arch_if*, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t)>*>(init_data);
+                auto* cb = reinterpret_cast<semihosting_cb_t<arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t>*>(init_data);
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
@@ -378,7 +353,7 @@ volatile std::array<bool, 2> dummy = {
 		    auto vm = new interp::${coreDef.name.toLowerCase()}::vm_impl<arch::${coreDef.name.toLowerCase()}>(*cpu, false);
 		    if (port != 0) debugger::server<debugger::gdb_session>::run_server(vm, port);
            if(init_data){
-                auto* cb = reinterpret_cast<std::function<void(arch_if*, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t)>*>(init_data);
+                auto* cb = reinterpret_cast<semihosting_cb_t<arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t>*>(init_data);
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
--- a/gen_input/templates/llvm/CORENAME.cpp.gtl
+++ b/gen_input/templates/llvm/CORENAME.cpp.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017, 2018 MINRES Technologies GmbH
+ * Copyright (C) 2017-2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -36,7 +36,10 @@
 #include <iss/iss.h>
 #include <iss/llvm/vm_base.h>
 #include <util/logging.h>
-
+#include <iss/instruction_decoder.h>
+<%def fcsr = registers.find {it.name=='FCSR'}
+if(fcsr != null) {%>
+#include <vm/fp_functions.h><%}%>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -82,7 +85,9 @@ protected:
    using vm_base<ARCH>::get_reg_ptr;

    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
-
+<%if(fcsr != null) {%>
+    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
+<%}%>
    template <typename T> inline ConstantInt *size(T type) {
        return ConstantInt::get(getContext(), APInt(32, type->getType()->getScalarSizeInBits()));
    }
@@ -96,13 +101,16 @@ protected:
        return super::gen_cond_assign(cond, this->gen_ext(trueVal, size), this->gen_ext(falseVal, size));
    }

-    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, unsigned int &, BasicBlock *) override;
+    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, BasicBlock *) override;

    void gen_leave_behavior(BasicBlock *leave_blk) override;
    void gen_raise_trap(uint16_t trap_id, uint16_t cause);
    void gen_leave_trap(unsigned lvl);
    void gen_wait(unsigned type);
+    void set_tval(uint64_t new_tval);
+    void set_tval(Value* new_tval);
    void gen_trap_behavior(BasicBlock *) override;
+    void gen_instr_prologue();
    void gen_instr_epilogue(BasicBlock *bb);

    inline Value *gen_reg_load(unsigned i, unsigned level = 0) {
@@ -127,33 +135,29 @@ protected:
        auto sign_mask = 1ULL<<(W-1);
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
    }
-
+<%functions.each{ it.eachLine { %>
+    ${it}<%}%>
+<%}%>
 private:
    /****************************************************************************
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
-        size_t length;
+        uint32_t length;
        uint32_t value;
        uint32_t mask;
        compile_func op;
    };
-    struct decoding_tree_node{
-        std::vector<instruction_descriptor> instrs;
-        std::vector<decoding_tree_node*> children;
-        uint32_t submask = std::numeric_limits<uint32_t>::max();
-        uint32_t value;
-        decoding_tree_node(uint32_t value) : value(value){}
-    };

-    decoding_tree_node* root {nullptr};
-
-    const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
+    const std::array<instruction_descriptor, ${instructions.size()}> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
        /* instruction ${instr.instruction.name}, encoding '${instr.encoding}' */
        {${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
    }};

+    //needs to be declared after instr_descr
+    decoder instr_decoder;
+
    /* instruction definitions */<%instructions.eachWithIndex{instr, idx -> %>
    /* instruction ${idx}: ${instr.name} */
    std::tuple<continuation_e, BasicBlock*> __${generator.functionName(instr.name)}(virt_addr_t& pc, code_word_t instr, BasicBlock* bb){
@@ -162,18 +166,27 @@ private:
        <%}%>if(this->disass_enabled){
            /* generate console output when executing the command */<%instr.disass.eachLine{%>
            ${it}<%}%>
+            std::vector<Value*> args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder.CreateGlobalStringPtr(mnemonic),
+            };
+            this->builder.CreateCall(this->mod->getFunction("print_disass"), args);
        }
        bb->setName(fmt::format("${instr.name}_0x{:X}",pc.val));
        this->gen_sync(PRE_SYNC,${idx});
-        auto cur_pc_val = this->gen_const(32,pc.val);
+        
+        this->gen_set_pc(pc, traits::PC);
+        this->set_tval(instr);
        pc=pc+ ${instr.length/8};
        this->gen_set_pc(pc, traits::NEXT_PC);
        
+        this->gen_instr_prologue();
        /*generate behavior*/
        <%instr.behavior.eachLine{%>${it}
        <%}%>
+        this->gen_sync(POST_SYNC, ${idx});
        this->gen_instr_epilogue(bb);
-    	this->gen_sync(POST_SYNC, ${idx});
        this->builder.CreateBr(bb);
    	return returnValue;        
    }
@@ -181,8 +194,17 @@ private:
    /****************************************************************************
     * end opcode definitions
     ****************************************************************************/
-    std::tuple<continuation_e, BasicBlock *> illegal_intruction(virt_addr_t &pc, code_word_t instr, BasicBlock *bb) {
-		this->gen_sync(iss::PRE_SYNC, instr_descr.size());
+    std::tuple<continuation_e, BasicBlock *> illegal_instruction(virt_addr_t &pc, code_word_t instr, BasicBlock *bb) {
+        if(this->disass_enabled){
+            auto mnemonic = std::string("illegal_instruction");
+            std::vector<Value*> args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder.CreateGlobalStringPtr(mnemonic),
+            };
+            this->builder.CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        this->gen_sync(iss::PRE_SYNC, instr_descr.size());
        this->builder.CreateStore(this->builder.CreateLoad(this->get_typeptr(traits::NEXT_PC), get_reg_ptr(traits::NEXT_PC), true),
                                   get_reg_ptr(traits::PC), true);
        this->builder.CreateStore(
@@ -190,62 +212,13 @@ private:
                                     this->gen_const(64U, 1)),
            get_reg_ptr(traits::ICOUNT), true);
        pc = pc + ((instr & 3) == 3 ? 4 : 2);
+        this->set_tval(instr);
        this->gen_raise_trap(0, 2);     // illegal instruction trap
 		this->gen_sync(iss::POST_SYNC, instr_descr.size());
-        this->gen_instr_epilogue(this->leave_blk);
-        return std::make_tuple(BRANCH, nullptr);
-    }    
-    //decoding functionality
-
-    void populate_decoding_tree(decoding_tree_node* root){
-        //create submask
-        for(auto instr: root->instrs){
-            root->submask &= instr.mask;
-        }
-        //put each instr according to submask&encoding into children
-        for(auto instr: root->instrs){
-            bool foundMatch = false;
-            for(auto child: root->children){
-                //use value as identifying trait
-                if(child->value == (instr.value&root->submask)){
-                    child->instrs.push_back(instr);
-                    foundMatch = true;
-                }
-            }
-            if(!foundMatch){
-                decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
-                child->instrs.push_back(instr);
-                root->children.push_back(child);
-            }
-        }
-        root->instrs.clear();
-        //call populate_decoding_tree for all children
-        if(root->children.size() >1)
-            for(auto child: root->children){
-                populate_decoding_tree(child);      
-            }
-        else{
-            //sort instrs by value of the mask, this works bc we want to have the least restrictive one last
-            std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
-            return instr1.mask > instr2.mask;
-            }); 
-        }
-    }
-    compile_func decode_instr(decoding_tree_node* node, code_word_t word){
-        if(!node->children.size()){
-            if(node->instrs.size() == 1) return node->instrs[0].op;
-            for(auto instr : node->instrs){
-                if((instr.mask&word) == instr.value) return instr.op;
-            }
-        }
-        else{
-            for(auto child : node->children){
-                if (child->value == (node->submask&word)){
-                    return decode_instr(child, word);
-                }  
-            }  
-        }
-        return nullptr;
+        bb = this->leave_blk;
+        this->gen_instr_epilogue(bb);
+        this->builder.CreateBr(bb);
+        return std::make_tuple(ILLEGAL_INSTR, nullptr);
    }    
 };

@@ -258,17 +231,20 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }

 template <typename ARCH>
 vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
-: vm_base<ARCH>(core, core_id, cluster_id) {
-    root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
-    for(auto instr:instr_descr){
-        root->instrs.push_back(instr);
-    }
-    populate_decoding_tree(root);
-}
+: vm_base<ARCH>(core, core_id, cluster_id)
+, instr_decoder([this]() {
+        std::vector<generic_instruction_descriptor> g_instr_descr;
+        g_instr_descr.reserve(instr_descr.size());
+        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
+            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
+            g_instr_descr.push_back(new_instr_descr);
+        }
+        return std::move(g_instr_descr);
+    }()) {}

 template <typename ARCH>
 std::tuple<continuation_e, BasicBlock *>
-vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, BasicBlock *this_block) {
+vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, BasicBlock *this_block) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
@@ -277,23 +253,19 @@ vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt,
    auto *const data = (uint8_t *)&instr;
    if(this->core.has_mmu())
        paddr = this->core.virt2phys(pc);
-    //TODO: re-add page handling
-//    if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
-//        auto res = this->core.read(paddr, 2, data);
-//        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
-//        if ((instr & 0x3) == 0x3) { // this is a 32bit instruction
-//            res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
-//        }
-//    } else {
-        auto res = this->core.read(paddr, 4, data);
-        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
-//    }
-    if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-    // curr pc on stack
-    ++inst_cnt;
-    auto f = decode_instr(root, instr);
+    auto res = this->core.read(paddr, 4, data);
+    if (res != iss::Ok) 
+        return std::make_tuple(ILLEGAL_FETCH, nullptr);
+    if (instr == 0x0000006f || (instr&0xffff)==0xa001){
+        this->builder.CreateBr(this->leave_blk);
+        return std::make_tuple(JUMP_TO_SELF, nullptr);
+        }
+    uint32_t inst_index = instr_decoder.decode_instr(instr);
+    compile_func f = nullptr;
+    if(inst_index < instr_descr.size())
+        f = instr_descr[inst_index].op;
    if (f == nullptr) {
-        f = &this_class::illegal_intruction;
+        f = &this_class::illegal_instruction;
    }
    return (this->*f)(pc, instr, this_block);
 }
@@ -308,16 +280,14 @@ template <typename ARCH>
 void vm_impl<ARCH>::gen_raise_trap(uint16_t trap_id, uint16_t cause) {
    auto *TRAP_val = this->gen_const(32, 0x80 << 24 | (cause << 16) | trap_id);
    this->builder.CreateStore(TRAP_val, get_reg_ptr(traits::TRAP_STATE), true);
-    this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()), get_reg_ptr(traits::LAST_BRANCH), false);
+    this->builder.CreateBr(this->trap_blk);
 }

 template <typename ARCH>
 void vm_impl<ARCH>::gen_leave_trap(unsigned lvl) {
    std::vector<Value *> args{ this->core_ptr, ConstantInt::get(getContext(), APInt(64, lvl)) };
    this->builder.CreateCall(this->mod->getFunction("leave_trap"), args);
-    auto *PC_val = this->gen_read_mem(traits::CSR, (lvl << 8) + 0x41, traits::XLEN / 8);
-    this->builder.CreateStore(PC_val, get_reg_ptr(traits::NEXT_PC), false);
-    this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()), get_reg_ptr(traits::LAST_BRANCH), false);
+    this->builder.CreateStore(this->gen_const(32U, static_cast<int>(UNKNOWN_JUMP)), get_reg_ptr(traits::LAST_BRANCH), false);
 }

 template <typename ARCH>
@@ -326,19 +296,37 @@ void vm_impl<ARCH>::gen_wait(unsigned type) {
    this->builder.CreateCall(this->mod->getFunction("wait"), args);
 }

+template <typename ARCH>
+inline void vm_impl<ARCH>::set_tval(uint64_t tval) {
+    auto tmp_tval = this->gen_const(64, tval);
+    this->set_tval(tmp_tval);
+}
+template <typename ARCH>
+inline void vm_impl<ARCH>::set_tval(Value* new_tval) {
+    this->builder.CreateStore(this->gen_ext(new_tval, 64, false), this->tval);
+}
 template <typename ARCH> 
 void vm_impl<ARCH>::gen_trap_behavior(BasicBlock *trap_blk) {
    this->builder.SetInsertPoint(trap_blk);
-    this->gen_sync(POST_SYNC, -1); //TODO get right InstrId
    auto *trap_state_val = this->builder.CreateLoad(this->get_typeptr(traits::TRAP_STATE), get_reg_ptr(traits::TRAP_STATE), true);
-    this->builder.CreateStore(this->gen_const(32U, std::numeric_limits<uint32_t>::max()),
-                              get_reg_ptr(traits::LAST_BRANCH), false);
-    std::vector<Value *> args{this->core_ptr, this->adj_to64(trap_state_val),
-                              this->adj_to64(this->builder.CreateLoad(this->get_typeptr(traits::PC), get_reg_ptr(traits::PC), false))};
+    auto *cur_pc_val = this->builder.CreateLoad(this->get_typeptr(traits::PC), get_reg_ptr(traits::PC), true);
+    std::vector<Value *> args{this->core_ptr,
+                                this->adj_to64(trap_state_val),
+                                this->adj_to64(cur_pc_val),
+                              this->adj_to64(this->builder.CreateLoad(this->get_type(64),this->tval))};
    this->builder.CreateCall(this->mod->getFunction("enter_trap"), args);
+    this->builder.CreateStore(this->gen_const(32U, static_cast<int>(UNKNOWN_JUMP)), get_reg_ptr(traits::LAST_BRANCH), false);
+
    auto *trap_addr_val = this->builder.CreateLoad(this->get_typeptr(traits::NEXT_PC), get_reg_ptr(traits::NEXT_PC), false);
    this->builder.CreateRet(trap_addr_val);
 }
+template <typename ARCH>
+void vm_impl<ARCH>::gen_instr_prologue() {
+    auto* trap_val =
+        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::PENDING_TRAP), get_reg_ptr(arch::traits<ARCH>::PENDING_TRAP));
+    this->builder.CreateStore(trap_val, get_reg_ptr(arch::traits<ARCH>::TRAP_STATE), false);
+}
+            

 template <typename ARCH>
 void vm_impl<ARCH>::gen_instr_epilogue(BasicBlock *bb) {
@@ -349,6 +337,14 @@ void vm_impl<ARCH>::gen_instr_epilogue(BasicBlock *bb) {
                              ConstantInt::get(getContext(), APInt(v->getType()->getIntegerBitWidth(), 0))),
                          target_bb, this->trap_blk, 1);
    this->builder.SetInsertPoint(target_bb);
+    // update icount
+    auto* icount_val = this->builder.CreateAdd(
+        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::ICOUNT), get_reg_ptr(arch::traits<ARCH>::ICOUNT)), this->gen_const(64U, 1));
+    this->builder.CreateStore(icount_val, get_reg_ptr(arch::traits<ARCH>::ICOUNT), false);
+    //increment cyclecount
+    auto* cycle_val = this->builder.CreateAdd(
+        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::CYCLE), get_reg_ptr(arch::traits<ARCH>::CYCLE)), this->gen_const(64U, 1));
+    this->builder.CreateStore(cycle_val, get_reg_ptr(arch::traits<ARCH>::CYCLE), false);
 }

 } // namespace ${coreDef.name.toLowerCase()}
--- a/gen_input/templates/tcc/CORENAME.cpp.gtl
+++ b/gen_input/templates/tcc/CORENAME.cpp.gtl
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2020 MINRES Technologies GmbH
+ * Copyright (C) 2020-2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -37,7 +37,10 @@
 #include <iss/tcc/vm_base.h>
 #include <util/logging.h>
 #include <sstream>
-
+#include <iss/instruction_decoder.h>
+<%def fcsr = registers.find {it.name=='FCSR'}
+if(fcsr != null) {%>
+#include <vm/fp_functions.h><%}%>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -80,16 +83,21 @@ protected:
    using vm_base<ARCH>::get_reg_ptr;

    using this_class = vm_impl<ARCH>;
-    using compile_ret_t = std::tuple<continuation_e>;
+    using compile_ret_t = continuation_e;
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr, tu_builder&);

    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
+<%
+if(fcsr != null) {%>
+    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
+<%}%>
+    void add_prologue(tu_builder& tu) override;

    void setup_module(std::string m) override {
        super::setup_module(m);
    }

-    compile_ret_t gen_single_inst_behavior(virt_addr_t &, unsigned int &, tu_builder&) override;
+    compile_ret_t gen_single_inst_behavior(virt_addr_t &, tu_builder&) override;

    void gen_trap_behavior(tu_builder& tu) override;

@@ -97,7 +105,9 @@ protected:

    void gen_leave_trap(tu_builder& tu, unsigned lvl);

-    void gen_wait(tu_builder& tu, unsigned type);
+    inline void gen_set_tval(tu_builder& tu, uint64_t new_tval);
+
+    inline void gen_set_tval(tu_builder& tu, value new_tval);

    inline void gen_trap_check(tu_builder& tu) {
        tu("if(*trap_state!=0) goto trap_entry;");
@@ -128,32 +138,29 @@ protected:
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
    }

+<%functions.each{ it.eachLine { %>
+    ${it}<%}%>
+<%}%>
 private:
    /****************************************************************************
     * start opcode definitions
     ****************************************************************************/
    struct instruction_descriptor {
-        size_t length;
+        uint32_t length;
        uint32_t value;
        uint32_t mask;
        compile_func op;
    };
-    struct decoding_tree_node{
-        std::vector<instruction_descriptor> instrs;
-        std::vector<decoding_tree_node*> children;
-        uint32_t submask = std::numeric_limits<uint32_t>::max();
-        uint32_t value;
-        decoding_tree_node(uint32_t value) : value(value){}
-    };

-    decoding_tree_node* root {nullptr};
-
-    const std::array<instruction_descriptor, ${instructions.size}> instr_descr = {{
+    const std::array<instruction_descriptor, ${instructions.size()}> instr_descr = {{
         /* entries are: size, valid value, valid mask, function ptr */<%instructions.each{instr -> %>
        /* instruction ${instr.instruction.name}, encoding '${instr.encoding}' */
        {${instr.length}, ${instr.encoding}, ${instr.mask}, &this_class::__${generator.functionName(instr.name)}},<%}%>
    }};

+    //needs to be declared after instr_descr
+    decoder instr_decoder;
+
    /* instruction definitions */<%instructions.eachWithIndex{instr, idx -> %>
    /* instruction ${idx}: ${instr.name} */
    compile_ret_t __${generator.functionName(instr.name)}(virt_addr_t& pc, code_word_t instr, tu_builder& tu){
@@ -164,82 +171,37 @@ private:
        <%}%>if(this->disass_enabled){
            /* generate console output when executing the command */<%instr.disass.eachLine{%>
            ${it}<%}%>
+            tu("print_disass(core_ptr, {:#x}, \"{}\");", pc.val, mnemonic);
        }
        auto cur_pc_val = tu.constant(pc.val, traits::reg_bit_widths[traits::PC]);
        pc=pc+ ${instr.length/8};
        gen_set_pc(tu, pc, traits::NEXT_PC);
+        tu("(*cycle)++;");
        tu.open_scope();
+        this->gen_set_tval(tu, instr);
        <%instr.behavior.eachLine{%>${it}
        <%}%>
        tu.close_scope();
-        gen_trap_check(tu);        
        vm_base<ARCH>::gen_sync(tu, POST_SYNC,${idx});
+        gen_trap_check(tu);        
        return returnValue;
    }
    <%}%>
    /****************************************************************************
     * end opcode definitions
     ****************************************************************************/
-    compile_ret_t illegal_intruction(virt_addr_t &pc, code_word_t instr, tu_builder& tu) {
+    compile_ret_t illegal_instruction(virt_addr_t &pc, code_word_t instr, tu_builder& tu) {
        vm_impl::gen_sync(tu, iss::PRE_SYNC, instr_descr.size());
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            tu("print_disass(core_ptr, {:#x}, \"{}\");", pc.val, std::string("illegal_instruction"));
+        }
        pc = pc + ((instr & 3) == 3 ? 4 : 2);
-        gen_raise_trap(tu, 0, 2);     // illegal instruction trap
+        gen_raise_trap(tu, 0, static_cast<int32_t>(traits:: RV_CAUSE_ILLEGAL_INSTRUCTION));
+        this->gen_set_tval(tu, instr);
        vm_impl::gen_sync(tu, iss::POST_SYNC, instr_descr.size());
        vm_impl::gen_trap_check(tu);
-        return BRANCH;
-    }
-    
-    //decoding functionality
-
-    void populate_decoding_tree(decoding_tree_node* root){
-        //create submask
-        for(auto instr: root->instrs){
-            root->submask &= instr.mask;
-        }
-        //put each instr according to submask&encoding into children
-        for(auto instr: root->instrs){
-            bool foundMatch = false;
-            for(auto child: root->children){
-                //use value as identifying trait
-                if(child->value == (instr.value&root->submask)){
-                    child->instrs.push_back(instr);
-                    foundMatch = true;
-                }
-            }
-            if(!foundMatch){
-                decoding_tree_node* child = new decoding_tree_node(instr.value&root->submask);
-                child->instrs.push_back(instr);
-                root->children.push_back(child);
-            }
-        }
-        root->instrs.clear();
-        //call populate_decoding_tree for all children
-        if(root->children.size() >1)
-            for(auto child: root->children){
-                populate_decoding_tree(child);      
-            }
-        else{
-            //sort instrs by value of the mask, this works bc we want to have the least restrictive one last
-            std::sort(root->children[0]->instrs.begin(), root->children[0]->instrs.end(), [](const instruction_descriptor& instr1, const instruction_descriptor& instr2) {
-            return instr1.mask > instr2.mask;
-            }); 
-        }
-    }
-    compile_func decode_instr(decoding_tree_node* node, code_word_t word){
-        if(!node->children.size()){
-            if(node->instrs.size() == 1) return node->instrs[0].op;
-            for(auto instr : node->instrs){
-                if((instr.mask&word) == instr.value) return instr.op;
-            }
-        }
-        else{
-            for(auto child : node->children){
-                if (child->value == (node->submask&word)){
-                    return decode_instr(child, word);
-                }  
-            }  
-        }
-        return nullptr;
+        return ILLEGAL_INSTR;
    }
 };

@@ -252,65 +214,100 @@ template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }

 template <typename ARCH>
 vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
-: vm_base<ARCH>(core, core_id, cluster_id) {
-    root = new decoding_tree_node(std::numeric_limits<uint32_t>::max());
-    for(auto instr:instr_descr){
-        root->instrs.push_back(instr);
-    }
-    populate_decoding_tree(root);
-}
+: vm_base<ARCH>(core, core_id, cluster_id)
+, instr_decoder([this]() {
+        std::vector<generic_instruction_descriptor> g_instr_descr;
+        g_instr_descr.reserve(instr_descr.size());
+        for (uint32_t i = 0; i < instr_descr.size(); ++i) {
+            generic_instruction_descriptor new_instr_descr {instr_descr[i].value, instr_descr[i].mask, i};
+            g_instr_descr.push_back(new_instr_descr);
+        }
+        return std::move(g_instr_descr);
+    }()) {}

 template <typename ARCH>
-std::tuple<continuation_e>
-vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) {
+continuation_e
+vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, tu_builder& tu) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
    if(this->core.has_mmu())
        paddr = this->core.virt2phys(pc);
-    //TODO: re-add page handling
-//    if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
-//        auto res = this->core.read(paddr, 2, data);
-//        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
-//        if ((insn & 0x3) == 0x3) { // this is a 32bit instruction
-//            res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
-//        }
-//    } else {
-        auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
-        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
-//    }
-    if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-    // curr pc on stack
-    ++inst_cnt;
-    auto f = decode_instr(root, instr);
+    auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
+    if (res != iss::Ok)
+        return ILLEGAL_FETCH;
+    if (instr == 0x0000006f || (instr&0xffff)==0xa001) 
+        return JUMP_TO_SELF;
+    uint32_t inst_index = instr_decoder.decode_instr(instr);
+    compile_func f = nullptr;
+    if(inst_index < instr_descr.size())
+        f = instr_descr[inst_index].op;
    if (f == nullptr) {
-        f = &this_class::illegal_intruction;
+        f = &this_class::illegal_instruction;
    }
    return (this->*f)(pc, instr, tu);
 }

 template <typename ARCH> void vm_impl<ARCH>::gen_raise_trap(tu_builder& tu, uint16_t trap_id, uint16_t cause) {
    tu("  *trap_state = {:#x};", 0x80 << 24 | (cause << 16) | trap_id);
-    tu.store(traits::NEXT_PC, tu.constant(std::numeric_limits<uint32_t>::max(), 32));
 }

 template <typename ARCH> void vm_impl<ARCH>::gen_leave_trap(tu_builder& tu, unsigned lvl) {
    tu("leave_trap(core_ptr, {});", lvl);
    tu.store(traits::NEXT_PC, tu.read_mem(traits::CSR, (lvl << 8) + 0x41, traits::XLEN));
-    tu.store(traits::LAST_BRANCH, tu.constant(std::numeric_limits<uint32_t>::max(), 32));
+    tu.store(traits::LAST_BRANCH, tu.constant(static_cast<int>(UNKNOWN_JUMP), 32));
 }

-template <typename ARCH> void vm_impl<ARCH>::gen_wait(tu_builder& tu, unsigned type) {
+template <typename ARCH> void vm_impl<ARCH>::gen_set_tval(tu_builder& tu, uint64_t new_tval) {
+    tu(fmt::format("tval = {};", new_tval));
+}
+template <typename ARCH> void vm_impl<ARCH>::gen_set_tval(tu_builder& tu, value new_tval) {
+    tu(fmt::format("tval = {};", new_tval.str));
 }

 template <typename ARCH> void vm_impl<ARCH>::gen_trap_behavior(tu_builder& tu) {
    tu("trap_entry:");
    this->gen_sync(tu, POST_SYNC, -1);    
-    tu("enter_trap(core_ptr, *trap_state, *pc, 0);");
-    tu.store(traits::LAST_BRANCH, tu.constant(std::numeric_limits<uint32_t>::max(),32));
+    tu("enter_trap(core_ptr, *trap_state, *pc, tval);");
+    tu.store(traits::LAST_BRANCH, tu.constant(static_cast<int>(UNKNOWN_JUMP),32));
    tu("return *next_pc;");
 }
+template <typename ARCH> void vm_impl<ARCH>::add_prologue(tu_builder& tu){
+    std::ostringstream os;
+    os << tu.add_reg_ptr("trap_state", arch::traits<ARCH>::TRAP_STATE, this->regs_base_ptr);
+    os << tu.add_reg_ptr("pending_trap", arch::traits<ARCH>::PENDING_TRAP, this->regs_base_ptr);
+    os << tu.add_reg_ptr("cycle", arch::traits<ARCH>::CYCLE, this->regs_base_ptr);
+<%if(fcsr != null) {%>
+    os << "uint32_t (*fget_flags)()=" << (uintptr_t)&fget_flags << ";\\n";
+    os << "uint32_t (*fadd_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fadd_s << ";\\n";
+    os << "uint32_t (*fsub_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fsub_s << ";\\n";
+    os << "uint32_t (*fmul_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fmul_s << ";\\n";
+    os << "uint32_t (*fdiv_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fdiv_s << ";\\n";
+    os << "uint32_t (*fsqrt_s)(uint32_t v1, uint8_t mode)=" << (uintptr_t)&fsqrt_s << ";\\n";
+    os << "uint32_t (*fcmp_s)(uint32_t v1, uint32_t v2, uint32_t op)=" << (uintptr_t)&fcmp_s << ";\\n";
+    os << "uint32_t (*fcvt_s)(uint32_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_s << ";\\n";
+    os << "uint32_t (*fmadd_s)(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t op, uint8_t mode)=" << (uintptr_t)&fmadd_s << ";\\n";
+    os << "uint32_t (*fsel_s)(uint32_t v1, uint32_t v2, uint32_t op)=" << (uintptr_t)&fsel_s << ";\\n";
+    os << "uint32_t (*fclass_s)( uint32_t v1 )=" << (uintptr_t)&fclass_s << ";\\n";
+    os << "uint32_t (*fconv_d2f)(uint64_t v1, uint8_t mode)=" << (uintptr_t)&fconv_d2f << ";\\n";
+    os << "uint64_t (*fconv_f2d)(uint32_t v1, uint8_t mode)=" << (uintptr_t)&fconv_f2d << ";\\n";
+    os << "uint64_t (*fadd_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fadd_d << ";\\n";
+    os << "uint64_t (*fsub_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fsub_d << ";\\n";
+    os << "uint64_t (*fmul_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fmul_d << ";\\n";
+    os << "uint64_t (*fdiv_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fdiv_d << ";\\n";
+    os << "uint64_t (*fsqrt_d)(uint64_t v1, uint8_t mode)=" << (uintptr_t)&fsqrt_d << ";\\n";
+    os << "uint64_t (*fcmp_d)(uint64_t v1, uint64_t v2, uint32_t op)=" << (uintptr_t)&fcmp_d << ";\\n";
+    os << "uint64_t (*fcvt_d)(uint64_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_d << ";\\n";
+    os << "uint64_t (*fmadd_d)(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mode)=" << (uintptr_t)&fmadd_d << ";\\n";
+    os << "uint64_t (*fsel_d)(uint64_t v1, uint64_t v2, uint32_t op)=" << (uintptr_t)&fsel_d << ";\\n";
+    os << "uint64_t (*fclass_d)(uint64_t v1  )=" << (uintptr_t)&fclass_d << ";\\n";
+    os << "uint64_t (*fcvt_32_64)(uint32_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_32_64 << ";\\n";
+    os << "uint32_t (*fcvt_64_32)(uint64_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_64_32 << ";\\n";
+    os << "uint32_t (*unbox_s)(uint64_t v)=" << (uintptr_t)&unbox_s << ";\\n";
+    <%}%>
+    tu.add_prologue(os.str());
+}

 } // namespace ${coreDef.name.toLowerCase()}

@@ -334,7 +331,7 @@ volatile std::array<bool, 2> dummy = {
 		    auto vm = new tcc::${coreDef.name.toLowerCase()}::vm_impl<arch::${coreDef.name.toLowerCase()}>(*cpu, false);
 		    if (port != 0) debugger::server<debugger::gdb_session>::run_server(vm, port);
            if(init_data){
-                auto* cb = reinterpret_cast<std::function<void(arch_if*, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t)>*>(init_data);
+                auto* cb = reinterpret_cast<semihosting_cb_t<arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t>*>(init_data);
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
@@ -344,7 +341,7 @@ volatile std::array<bool, 2> dummy = {
 		    auto vm = new tcc::${coreDef.name.toLowerCase()}::vm_impl<arch::${coreDef.name.toLowerCase()}>(*cpu, false);
 		    if (port != 0) debugger::server<debugger::gdb_session>::run_server(vm, port);
            if(init_data){
-                auto* cb = reinterpret_cast<std::function<void(arch_if*, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t, arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t)>*>(init_data);
+                auto* cb = reinterpret_cast<semihosting_cb_t<arch::traits<arch::${coreDef.name.toLowerCase()}>::reg_t>*>(init_data);
                cpu->set_semihosting_callback(*cb);
            }
            return {cpu_ptr{cpu}, vm_ptr{vm}};
--- a/softfloat/.gitignore
+++ b/softfloat/.gitignore
@@ -0,0 +1,2 @@
+build/*/*.o
+build/*/*.a
--- a/softfloat/README.md
+++ b/softfloat/README.md
@@ -0,0 +1,24 @@
+
+Package Overview for Berkeley SoftFloat Release 3e
+==================================================
+
+John R. Hauser<br>
+2018 January 20
+
+
+Berkeley SoftFloat is a software implementation of binary floating-point
+that conforms to the IEEE Standard for Floating-Point Arithmetic.  SoftFloat
+is distributed in the form of C source code.  Building the SoftFloat sources
+generates a library file (typically `softfloat.a` or `libsoftfloat.a`)
+containing the floating-point subroutines.
+
+
+The SoftFloat package is documented in the following files in the `doc`
+subdirectory:
+
+* [SoftFloat.html](http://www.jhauser.us/arithmetic/SoftFloat-3/doc/SoftFloat.html) Documentation for using the SoftFloat functions.
+* [SoftFloat-source.html](http://www.jhauser.us/arithmetic/SoftFloat-3/doc/SoftFloat-source.html) Documentation for building SoftFloat.
+* [SoftFloat-history.html](http://www.jhauser.us/arithmetic/SoftFloat-3/doc/SoftFloat-history.html) History of the major changes to SoftFloat.
+
+Other files in the package comprise the source code for SoftFloat.
+
--- a/softfloat/build/Linux-386-GCC/platform.h
+++ b/softfloat/build/Linux-386-GCC/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
+
--- a/softfloat/build/Linux-386-SSE2-GCC/platform.h
+++ b/softfloat/build/Linux-386-SSE2-GCC/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
+
--- a/softfloat/build/Linux-ARM-VFPv2-GCC/platform.h
+++ b/softfloat/build/Linux-ARM-VFPv2-GCC/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
+
--- a/softfloat/build/Linux-RISCV64-GCC/Makefile
+++ b/softfloat/build/Linux-RISCV64-GCC/Makefile
@@ -0,0 +1,399 @@
+
+#=============================================================================
+#
+# This Makefile is part of the SoftFloat IEEE Floating-Point Arithmetic
+# Package, Release 3e, by John R. Hauser.
+#
+# Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+# University of California.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#     this list of conditions, and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions, and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#
+#  3. Neither the name of the University nor the names of its contributors
+#     may be used to endorse or promote products derived from this software
+#     without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+# DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#=============================================================================
+
+SOURCE_DIR ?= ../../source
+SPECIALIZE_TYPE ?= RISCV
+MARCH ?= rv64gcv_zfh_zfhmin
+MABI ?= lp64d
+
+SOFTFLOAT_OPTS ?= \
+  -DSOFTFLOAT_ROUND_ODD -DINLINE_LEVEL=5 -DSOFTFLOAT_FAST_DIV32TO16 \
+  -DSOFTFLOAT_FAST_DIV64TO32
+
+DELETE = rm -f
+C_INCLUDES = -I. -I$(SOURCE_DIR)/$(SPECIALIZE_TYPE) -I$(SOURCE_DIR)/include
+COMPILE_C = \
+  riscv64-unknown-linux-gnu-gcc -c -march=$(MARCH) -mabi=$(MABI) -Werror-implicit-function-declaration -DSOFTFLOAT_FAST_INT64 \
+    $(SOFTFLOAT_OPTS) $(C_INCLUDES) -O2 -o $@
+MAKELIB = ar crs $@
+
+OBJ = .o
+LIB = .a
+
+OTHER_HEADERS = $(SOURCE_DIR)/include/opts-GCC.h
+
+.PHONY: all
+all: softfloat$(LIB)
+
+OBJS_PRIMITIVES = \
+  s_eq128$(OBJ) \
+  s_le128$(OBJ) \
+  s_lt128$(OBJ) \
+  s_shortShiftLeft128$(OBJ) \
+  s_shortShiftRight128$(OBJ) \
+  s_shortShiftRightJam64$(OBJ) \
+  s_shortShiftRightJam64Extra$(OBJ) \
+  s_shortShiftRightJam128$(OBJ) \
+  s_shortShiftRightJam128Extra$(OBJ) \
+  s_shiftRightJam32$(OBJ) \
+  s_shiftRightJam64$(OBJ) \
+  s_shiftRightJam64Extra$(OBJ) \
+  s_shiftRightJam128$(OBJ) \
+  s_shiftRightJam128Extra$(OBJ) \
+  s_shiftRightJam256M$(OBJ) \
+  s_countLeadingZeros8$(OBJ) \
+  s_countLeadingZeros16$(OBJ) \
+  s_countLeadingZeros32$(OBJ) \
+  s_countLeadingZeros64$(OBJ) \
+  s_add128$(OBJ) \
+  s_add256M$(OBJ) \
+  s_sub128$(OBJ) \
+  s_sub256M$(OBJ) \
+  s_mul64ByShifted32To128$(OBJ) \
+  s_mul64To128$(OBJ) \
+  s_mul128By32$(OBJ) \
+  s_mul128To256M$(OBJ) \
+  s_approxRecip_1Ks$(OBJ) \
+  s_approxRecip32_1$(OBJ) \
+  s_approxRecipSqrt_1Ks$(OBJ) \
+  s_approxRecipSqrt32_1$(OBJ) \
+
+OBJS_SPECIALIZE = \
+  softfloat_raiseFlags$(OBJ) \
+  s_f16UIToCommonNaN$(OBJ) \
+  s_commonNaNToF16UI$(OBJ) \
+  s_propagateNaNF16UI$(OBJ) \
+  s_bf16UIToCommonNaN$(OBJ) \
+  s_commonNaNToBF16UI$(OBJ) \
+  s_f32UIToCommonNaN$(OBJ) \
+  s_commonNaNToF32UI$(OBJ) \
+  s_propagateNaNF32UI$(OBJ) \
+  s_f64UIToCommonNaN$(OBJ) \
+  s_commonNaNToF64UI$(OBJ) \
+  s_propagateNaNF64UI$(OBJ) \
+  extF80M_isSignalingNaN$(OBJ) \
+  s_extF80UIToCommonNaN$(OBJ) \
+  s_commonNaNToExtF80UI$(OBJ) \
+  s_propagateNaNExtF80UI$(OBJ) \
+  f128M_isSignalingNaN$(OBJ) \
+  s_f128UIToCommonNaN$(OBJ) \
+  s_commonNaNToF128UI$(OBJ) \
+  s_propagateNaNF128UI$(OBJ) \
+
+OBJS_OTHERS = \
+  s_roundToUI32$(OBJ) \
+  s_roundToUI64$(OBJ) \
+  s_roundToI32$(OBJ) \
+  s_roundToI64$(OBJ) \
+  s_normSubnormalBF16Sig$(OBJ) \
+  s_roundPackToBF16$(OBJ) \
+  s_normSubnormalF16Sig$(OBJ) \
+  s_roundPackToF16$(OBJ) \
+  s_normRoundPackToF16$(OBJ) \
+  s_addMagsF16$(OBJ) \
+  s_subMagsF16$(OBJ) \
+  s_mulAddF16$(OBJ) \
+  s_normSubnormalF32Sig$(OBJ) \
+  s_roundPackToF32$(OBJ) \
+  s_normRoundPackToF32$(OBJ) \
+  s_addMagsF32$(OBJ) \
+  s_subMagsF32$(OBJ) \
+  s_mulAddF32$(OBJ) \
+  s_normSubnormalF64Sig$(OBJ) \
+  s_roundPackToF64$(OBJ) \
+  s_normRoundPackToF64$(OBJ) \
+  s_addMagsF64$(OBJ) \
+  s_subMagsF64$(OBJ) \
+  s_mulAddF64$(OBJ) \
+  s_normSubnormalExtF80Sig$(OBJ) \
+  s_roundPackToExtF80$(OBJ) \
+  s_normRoundPackToExtF80$(OBJ) \
+  s_addMagsExtF80$(OBJ) \
+  s_subMagsExtF80$(OBJ) \
+  s_normSubnormalF128Sig$(OBJ) \
+  s_roundPackToF128$(OBJ) \
+  s_normRoundPackToF128$(OBJ) \
+  s_addMagsF128$(OBJ) \
+  s_subMagsF128$(OBJ) \
+  s_mulAddF128$(OBJ) \
+  softfloat_state$(OBJ) \
+  ui32_to_f16$(OBJ) \
+  ui32_to_f32$(OBJ) \
+  ui32_to_f64$(OBJ) \
+  ui32_to_extF80$(OBJ) \
+  ui32_to_extF80M$(OBJ) \
+  ui32_to_f128$(OBJ) \
+  ui32_to_f128M$(OBJ) \
+  ui64_to_f16$(OBJ) \
+  ui64_to_f32$(OBJ) \
+  ui64_to_f64$(OBJ) \
+  ui64_to_extF80$(OBJ) \
+  ui64_to_extF80M$(OBJ) \
+  ui64_to_f128$(OBJ) \
+  ui64_to_f128M$(OBJ) \
+  i32_to_f16$(OBJ) \
+  i32_to_f32$(OBJ) \
+  i32_to_f64$(OBJ) \
+  i32_to_extF80$(OBJ) \
+  i32_to_extF80M$(OBJ) \
+  i32_to_f128$(OBJ) \
+  i32_to_f128M$(OBJ) \
+  i64_to_f16$(OBJ) \
+  i64_to_f32$(OBJ) \
+  i64_to_f64$(OBJ) \
+  i64_to_extF80$(OBJ) \
+  i64_to_extF80M$(OBJ) \
+  i64_to_f128$(OBJ) \
+  i64_to_f128M$(OBJ) \
+  bf16_isSignalingNaN$(OBJ) \
+  bf16_to_f32$(OBJ) \
+  f16_to_ui32$(OBJ) \
+  f16_to_ui64$(OBJ) \
+  f16_to_i32$(OBJ) \
+  f16_to_i64$(OBJ) \
+  f16_to_ui32_r_minMag$(OBJ) \
+  f16_to_ui64_r_minMag$(OBJ) \
+  f16_to_i32_r_minMag$(OBJ) \
+  f16_to_i64_r_minMag$(OBJ) \
+  f16_to_f32$(OBJ) \
+  f16_to_f64$(OBJ) \
+  f16_to_extF80$(OBJ) \
+  f16_to_extF80M$(OBJ) \
+  f16_to_f128$(OBJ) \
+  f16_to_f128M$(OBJ) \
+  f16_roundToInt$(OBJ) \
+  f16_add$(OBJ) \
+  f16_sub$(OBJ) \
+  f16_mul$(OBJ) \
+  f16_mulAdd$(OBJ) \
+  f16_div$(OBJ) \
+  f16_rem$(OBJ) \
+  f16_sqrt$(OBJ) \
+  f16_eq$(OBJ) \
+  f16_le$(OBJ) \
+  f16_lt$(OBJ) \
+  f16_eq_signaling$(OBJ) \
+  f16_le_quiet$(OBJ) \
+  f16_lt_quiet$(OBJ) \
+  f16_isSignalingNaN$(OBJ) \
+  f32_to_ui32$(OBJ) \
+  f32_to_ui64$(OBJ) \
+  f32_to_i32$(OBJ) \
+  f32_to_i64$(OBJ) \
+  f32_to_ui32_r_minMag$(OBJ) \
+  f32_to_ui64_r_minMag$(OBJ) \
+  f32_to_i32_r_minMag$(OBJ) \
+  f32_to_i64_r_minMag$(OBJ) \
+  f32_to_bf16$(OBJ) \
+  f32_to_f16$(OBJ) \
+  f32_to_f64$(OBJ) \
+  f32_to_extF80$(OBJ) \
+  f32_to_extF80M$(OBJ) \
+  f32_to_f128$(OBJ) \
+  f32_to_f128M$(OBJ) \
+  f32_roundToInt$(OBJ) \
+  f32_add$(OBJ) \
+  f32_sub$(OBJ) \
+  f32_mul$(OBJ) \
+  f32_mulAdd$(OBJ) \
+  f32_div$(OBJ) \
+  f32_rem$(OBJ) \
+  f32_sqrt$(OBJ) \
+  f32_eq$(OBJ) \
+  f32_le$(OBJ) \
+  f32_lt$(OBJ) \
+  f32_eq_signaling$(OBJ) \
+  f32_le_quiet$(OBJ) \
+  f32_lt_quiet$(OBJ) \
+  f32_isSignalingNaN$(OBJ) \
+  f64_to_ui32$(OBJ) \
+  f64_to_ui64$(OBJ) \
+  f64_to_i32$(OBJ) \
+  f64_to_i64$(OBJ) \
+  f64_to_ui32_r_minMag$(OBJ) \
+  f64_to_ui64_r_minMag$(OBJ) \
+  f64_to_i32_r_minMag$(OBJ) \
+  f64_to_i64_r_minMag$(OBJ) \
+  f64_to_f16$(OBJ) \
+  f64_to_f32$(OBJ) \
+  f64_to_extF80$(OBJ) \
+  f64_to_extF80M$(OBJ) \
+  f64_to_f128$(OBJ) \
+  f64_to_f128M$(OBJ) \
+  f64_roundToInt$(OBJ) \
+  f64_add$(OBJ) \
+  f64_sub$(OBJ) \
+  f64_mul$(OBJ) \
+  f64_mulAdd$(OBJ) \
+  f64_div$(OBJ) \
+  f64_rem$(OBJ) \
+  f64_sqrt$(OBJ) \
+  f64_eq$(OBJ) \
+  f64_le$(OBJ) \
+  f64_lt$(OBJ) \
+  f64_eq_signaling$(OBJ) \
+  f64_le_quiet$(OBJ) \
+  f64_lt_quiet$(OBJ) \
+  f64_isSignalingNaN$(OBJ) \
+  extF80_to_ui32$(OBJ) \
+  extF80_to_ui64$(OBJ) \
+  extF80_to_i32$(OBJ) \
+  extF80_to_i64$(OBJ) \
+  extF80_to_ui32_r_minMag$(OBJ) \
+  extF80_to_ui64_r_minMag$(OBJ) \
+  extF80_to_i32_r_minMag$(OBJ) \
+  extF80_to_i64_r_minMag$(OBJ) \
+  extF80_to_f16$(OBJ) \
+  extF80_to_f32$(OBJ) \
+  extF80_to_f64$(OBJ) \
+  extF80_to_f128$(OBJ) \
+  extF80_roundToInt$(OBJ) \
+  extF80_add$(OBJ) \
+  extF80_sub$(OBJ) \
+  extF80_mul$(OBJ) \
+  extF80_div$(OBJ) \
+  extF80_rem$(OBJ) \
+  extF80_sqrt$(OBJ) \
+  extF80_eq$(OBJ) \
+  extF80_le$(OBJ) \
+  extF80_lt$(OBJ) \
+  extF80_eq_signaling$(OBJ) \
+  extF80_le_quiet$(OBJ) \
+  extF80_lt_quiet$(OBJ) \
+  extF80_isSignalingNaN$(OBJ) \
+  extF80M_to_ui32$(OBJ) \
+  extF80M_to_ui64$(OBJ) \
+  extF80M_to_i32$(OBJ) \
+  extF80M_to_i64$(OBJ) \
+  extF80M_to_ui32_r_minMag$(OBJ) \
+  extF80M_to_ui64_r_minMag$(OBJ) \
+  extF80M_to_i32_r_minMag$(OBJ) \
+  extF80M_to_i64_r_minMag$(OBJ) \
+  extF80M_to_f16$(OBJ) \
+  extF80M_to_f32$(OBJ) \
+  extF80M_to_f64$(OBJ) \
+  extF80M_to_f128M$(OBJ) \
+  extF80M_roundToInt$(OBJ) \
+  extF80M_add$(OBJ) \
+  extF80M_sub$(OBJ) \
+  extF80M_mul$(OBJ) \
+  extF80M_div$(OBJ) \
+  extF80M_rem$(OBJ) \
+  extF80M_sqrt$(OBJ) \
+  extF80M_eq$(OBJ) \
+  extF80M_le$(OBJ) \
+  extF80M_lt$(OBJ) \
+  extF80M_eq_signaling$(OBJ) \
+  extF80M_le_quiet$(OBJ) \
+  extF80M_lt_quiet$(OBJ) \
+  f128_to_ui32$(OBJ) \
+  f128_to_ui64$(OBJ) \
+  f128_to_i32$(OBJ) \
+  f128_to_i64$(OBJ) \
+  f128_to_ui32_r_minMag$(OBJ) \
+  f128_to_ui64_r_minMag$(OBJ) \
+  f128_to_i32_r_minMag$(OBJ) \
+  f128_to_i64_r_minMag$(OBJ) \
+  f128_to_f16$(OBJ) \
+  f128_to_f32$(OBJ) \
+  f128_to_extF80$(OBJ) \
+  f128_to_f64$(OBJ) \
+  f128_roundToInt$(OBJ) \
+  f128_add$(OBJ) \
+  f128_sub$(OBJ) \
+  f128_mul$(OBJ) \
+  f128_mulAdd$(OBJ) \
+  f128_div$(OBJ) \
+  f128_rem$(OBJ) \
+  f128_sqrt$(OBJ) \
+  f128_eq$(OBJ) \
+  f128_le$(OBJ) \
+  f128_lt$(OBJ) \
+  f128_eq_signaling$(OBJ) \
+  f128_le_quiet$(OBJ) \
+  f128_lt_quiet$(OBJ) \
+  f128_isSignalingNaN$(OBJ) \
+  f128M_to_ui32$(OBJ) \
+  f128M_to_ui64$(OBJ) \
+  f128M_to_i32$(OBJ) \
+  f128M_to_i64$(OBJ) \
+  f128M_to_ui32_r_minMag$(OBJ) \
+  f128M_to_ui64_r_minMag$(OBJ) \
+  f128M_to_i32_r_minMag$(OBJ) \
+  f128M_to_i64_r_minMag$(OBJ) \
+  f128M_to_f16$(OBJ) \
+  f128M_to_f32$(OBJ) \
+  f128M_to_extF80M$(OBJ) \
+  f128M_to_f64$(OBJ) \
+  f128M_roundToInt$(OBJ) \
+  f128M_add$(OBJ) \
+  f128M_sub$(OBJ) \
+  f128M_mul$(OBJ) \
+  f128M_mulAdd$(OBJ) \
+  f128M_div$(OBJ) \
+  f128M_rem$(OBJ) \
+  f128M_sqrt$(OBJ) \
+  f128M_eq$(OBJ) \
+  f128M_le$(OBJ) \
+  f128M_lt$(OBJ) \
+  f128M_eq_signaling$(OBJ) \
+  f128M_le_quiet$(OBJ) \
+  f128M_lt_quiet$(OBJ) \
+
+OBJS_ALL = $(OBJS_PRIMITIVES) $(OBJS_SPECIALIZE) $(OBJS_OTHERS)
+
+$(OBJS_ALL): \
+  $(OTHER_HEADERS) platform.h $(SOURCE_DIR)/include/primitiveTypes.h \
+  $(SOURCE_DIR)/include/primitives.h
+$(OBJS_SPECIALIZE) $(OBJS_OTHERS): \
+  $(SOURCE_DIR)/include/softfloat_types.h $(SOURCE_DIR)/include/internals.h \
+  $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/specialize.h \
+  $(SOURCE_DIR)/include/softfloat.h
+
+$(OBJS_PRIMITIVES) $(OBJS_OTHERS): %$(OBJ): $(SOURCE_DIR)/%.c
+	$(COMPILE_C) $(SOURCE_DIR)/$*.c
+
+$(OBJS_SPECIALIZE): %$(OBJ): $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/%.c
+	$(COMPILE_C) $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/$*.c
+
+softfloat$(LIB): $(OBJS_ALL)
+	$(DELETE) $@
+	$(MAKELIB) $^
+
+.PHONY: clean
+clean:
+	$(DELETE) $(OBJS_ALL) softfloat$(LIB)
+
--- a/softfloat/build/Linux-RISCV64-GCC/platform.h
+++ b/softfloat/build/Linux-RISCV64-GCC/platform.h
@@ -0,0 +1,54 @@
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+University of California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+#define LITTLEENDIAN 1
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+#ifdef __GNUC_STDC_INLINE__
+#define INLINE inline
+#else
+#define INLINE extern inline
+#endif
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+#define SOFTFLOAT_BUILTIN_CLZ 1
+#define SOFTFLOAT_INTRINSIC_INT128 1
+#include "opts-GCC.h"
+
--- a/softfloat/build/Linux-x86_64-GCC/Makefile
+++ b/softfloat/build/Linux-x86_64-GCC/Makefile
@@ -94,6 +94,8 @@ OBJS_SPECIALIZE = \
  s_f16UIToCommonNaN$(OBJ) \
  s_commonNaNToF16UI$(OBJ) \
  s_propagateNaNF16UI$(OBJ) \
+  s_bf16UIToCommonNaN$(OBJ) \
+  s_commonNaNToBF16UI$(OBJ) \
  s_f32UIToCommonNaN$(OBJ) \
  s_commonNaNToF32UI$(OBJ) \
  s_propagateNaNF32UI$(OBJ) \
@@ -114,6 +116,8 @@ OBJS_OTHERS = \
  s_roundToUI64$(OBJ) \
  s_roundToI32$(OBJ) \
  s_roundToI64$(OBJ) \
+  s_normSubnormalBF16Sig$(OBJ) \
+  s_roundPackToBF16$(OBJ) \
  s_normSubnormalF16Sig$(OBJ) \
  s_roundPackToF16$(OBJ) \
  s_normRoundPackToF16$(OBJ) \
@@ -172,6 +176,8 @@ OBJS_OTHERS = \
  i64_to_extF80M$(OBJ) \
  i64_to_f128$(OBJ) \
  i64_to_f128M$(OBJ) \
+  bf16_isSignalingNaN$(OBJ) \
+  bf16_to_f32$(OBJ) \
  f16_to_ui32$(OBJ) \
  f16_to_ui64$(OBJ) \
  f16_to_i32$(OBJ) \
@@ -209,6 +215,7 @@ OBJS_OTHERS = \
  f32_to_ui64_r_minMag$(OBJ) \
  f32_to_i32_r_minMag$(OBJ) \
  f32_to_i64_r_minMag$(OBJ) \
+  f32_to_bf16$(OBJ) \
  f32_to_f16$(OBJ) \
  f32_to_f64$(OBJ) \
  f32_to_extF80$(OBJ) \
--- a/softfloat/build/Win32-MinGW/platform.h
+++ b/softfloat/build/Win32-MinGW/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
+
--- a/softfloat/build/Win32-SSE2-MinGW/platform.h
+++ b/softfloat/build/Win32-SSE2-MinGW/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,6 +47,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #include "opts-GCC.h"
+
--- a/softfloat/build/Win64-MinGW-w64/platform.h
+++ b/softfloat/build/Win64-MinGW-w64/platform.h
@@ -35,11 +35,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 =============================================================================*/

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define LITTLEENDIAN 1

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #ifdef __GNUC_STDC_INLINE__
 #define INLINE inline
 #else
@@ -47,7 +47,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endif

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
+*----------------------------------------------------------------------------*/
 #define SOFTFLOAT_BUILTIN_CLZ 1
 #define SOFTFLOAT_INTRINSIC_INT128 1
 #include "opts-GCC.h"
+
--- a/softfloat/build/template-FAST_INT64/Makefile
+++ b/softfloat/build/template-FAST_INT64/Makefile
@@ -115,6 +115,8 @@ OBJS_OTHERS = \
  s_roundToUI64$(OBJ) \
  s_roundToI32$(OBJ) \
  s_roundToI64$(OBJ) \
+  s_normSubnormalBF16Sig$(OBJ) \
+  s_roundPackToBF16$(OBJ) \
  s_normSubnormalF16Sig$(OBJ) \
  s_roundPackToF16$(OBJ) \
  s_normRoundPackToF16$(OBJ) \
@@ -173,6 +175,8 @@ OBJS_OTHERS = \
  i64_to_extF80M$(OBJ) \
  i64_to_f128$(OBJ) \
  i64_to_f128M$(OBJ) \
+  bf16_isSignalingNaN$(OBJ) \
+  bf16_to_f32$(OBJ) \
  f16_to_ui32$(OBJ) \
  f16_to_ui64$(OBJ) \
  f16_to_i32$(OBJ) \
@@ -210,6 +214,7 @@ OBJS_OTHERS = \
  f32_to_ui64_r_minMag$(OBJ) \
  f32_to_i32_r_minMag$(OBJ) \
  f32_to_i64_r_minMag$(OBJ) \
+  f32_to_bf16$(OBJ) \
  f32_to_f16$(OBJ) \
  f32_to_f64$(OBJ) \
  f32_to_extF80$(OBJ) \
--- a/softfloat/build/template-FAST_INT64/platform.h
+++ b/softfloat/build/template-FAST_INT64/platform.h
@@ -37,13 +37,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // Edit lines marked with `==>'.  See "SoftFloat-source.html".

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
-== > #define LITTLEENDIAN 1
+*----------------------------------------------------------------------------*/
+==> #define LITTLEENDIAN 1

-    /*----------------------------------------------------------------------------
-     *----------------------------------------------------------------------------*/
-    == > #define INLINE inline
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+==> #define INLINE inline
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+==> #define THREAD_LOCAL _Thread_local

-    /*----------------------------------------------------------------------------
-     *----------------------------------------------------------------------------*/
-    == > #define THREAD_LOCAL _Thread_local
--- a/softfloat/build/template-not-FAST_INT64/platform.h
+++ b/softfloat/build/template-not-FAST_INT64/platform.h
@@ -37,13 +37,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // Edit lines marked with `==>'.  See "SoftFloat-source.html".

 /*----------------------------------------------------------------------------
- *----------------------------------------------------------------------------*/
-== > #define LITTLEENDIAN 1
+*----------------------------------------------------------------------------*/
+==> #define LITTLEENDIAN 1

-    /*----------------------------------------------------------------------------
-     *----------------------------------------------------------------------------*/
-    == > #define INLINE inline
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+==> #define INLINE inline
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+==> #define THREAD_LOCAL _Thread_local

-    /*----------------------------------------------------------------------------
-     *----------------------------------------------------------------------------*/
-    == > #define THREAD_LOCAL _Thread_local
--- a/softfloat/doc/SoftFloat.html
+++ b/softfloat/doc/SoftFloat.html
@@ -508,7 +508,7 @@ significant extra cost.
 On computers where the word size is <NOBR>64 bits</NOBR> or larger, both
 function versions (<CODE>f128M_add</CODE> and <CODE>f128_add</CODE>) are
 provided, because the cost of passing by value is then more reasonable.
-Applications that must be portable accross both classes of computers must use
+Applications that must be portable across both classes of computers must use
 the pointer-based functions, as these are always implemented.
 However, if it is known that SoftFloat includes the by-value functions for all
 platforms of interest, programmers can use whichever version they prefer.
--- a/softfloat/source/8086-SSE/s_bf16UIToCommonNaN.c
+++ b/softfloat/source/8086-SSE/s_bf16UIToCommonNaN.c
@@ -0,0 +1,59 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Assuming `uiA' has the bit pattern of a BF16 NaN, converts
+| this NaN to the common NaN form, and stores the resulting common NaN at the
+| location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr )
+{
+
+    if ( softfloat_isSigNaNBF16UI( uiA ) ) {
+        softfloat_raiseFlags( softfloat_flag_invalid );
+    }
+    zPtr->sign = uiA>>15;
+    zPtr->v64  = (uint_fast64_t) uiA<<56;
+    zPtr->v0   = 0;
+
+}
+
--- a/softfloat/source/8086-SSE/s_commonNaNToBF16UI.c
+++ b/softfloat/source/8086-SSE/s_commonNaNToBF16UI.c
@@ -0,0 +1,51 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "specialize.h"
+
+/*----------------------------------------------------------------------------
+| Converts the common NaN pointed to by `aPtr' into a BF16 NaN, and 
+| returns the bit pattern of this value as an unsigned integer.
+*----------------------------------------------------------------------------*/
+uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr )
+{
+
+    return (uint_fast16_t) aPtr->sign<<15 | 0x7FC0 | aPtr->v64>>56;
+
+}
+
--- a/softfloat/source/8086-SSE/specialize.h
+++ b/softfloat/source/8086-SSE/specialize.h
@@ -116,6 +116,27 @@ uint_fast16_t softfloat_commonNaNToF16UI(const struct commonNaN* aPtr);
 *----------------------------------------------------------------------------*/
 uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);

+/*----------------------------------------------------------------------------
+| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
+| 16-bit brain floating-point (BF16) signaling NaN.
+| Note:  This macro evaluates its argument more than once.
+*----------------------------------------------------------------------------*/
+#define softfloat_isSigNaNBF16UI(uiA) ((((uiA)&0x7FC0) == 0x7F80) && ((uiA)&0x003F))
+
+/*----------------------------------------------------------------------------
+| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
+| this NaN to the common NaN form, and stores the resulting common NaN at the
+| location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+void softfloat_bf16UIToCommonNaN(uint_fast16_t uiA, struct commonNaN* zPtr);
+
+/*----------------------------------------------------------------------------
+| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
+| NaN, and returns the bit pattern of this value as an unsigned integer.
+*----------------------------------------------------------------------------*/
+uint_fast16_t softfloat_commonNaNToBF16UI(const struct commonNaN* aPtr);
+
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
--- a/softfloat/source/RISCV/s_bf16UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_bf16UIToCommonNaN.c
@@ -0,0 +1,5 @@
+
+/*----------------------------------------------------------------------------
+| This file intentionally contains no code.
+*----------------------------------------------------------------------------*/
+
--- a/softfloat/source/RISCV/s_commonNaNToBF16UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToBF16UI.c
@@ -0,0 +1,5 @@
+
+/*----------------------------------------------------------------------------
+| This file intentionally contains no code.
+*----------------------------------------------------------------------------*/
+
--- a/softfloat/source/RISCV/s_commonNaNToExtF80M.c
+++ b/softfloat/source/RISCV/s_commonNaNToExtF80M.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,9 +34,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "softfloat_types.h"
+
+#define softfloat_commonNaNToExtF80M softfloat_commonNaNToExtF80M
 #include "specialize.h"

 /*----------------------------------------------------------------------------
@@ -49,8 +50,8 @@ void
     const struct commonNaN *aPtr, struct extFloat80M *zSPtr )
 {

-    zSPtr->signExp = packToExtF80UI64( aPtr->sign, 0x7FFF );
-    zSPtr->signif = UINT64_C( 0xC000000000000000 ) | aPtr->v64>>1;
+    zSPtr->signExp = defaultNaNExtF80UI64;
+    zSPtr->signif  = defaultNaNExtF80UI0;

 }

--- a/softfloat/source/RISCV/s_commonNaNToExtF80UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToExtF80UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,9 +34,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdint.h>
 #include "platform.h"
-#include "primitives.h"
+#include "primitiveTypes.h"
+
+#define softfloat_commonNaNToExtF80UI softfloat_commonNaNToExtF80UI
 #include "specialize.h"

 /*----------------------------------------------------------------------------
@@ -48,8 +49,8 @@ struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr )
 {
    struct uint128 uiZ;

-    uiZ.v64 = (uint_fast16_t) aPtr->sign<<15 | 0x7FFF;
-    uiZ.v0 = UINT64_C( 0xC000000000000000 ) | aPtr->v64>>1;
+    uiZ.v64 = defaultNaNExtF80UI64;
+    uiZ.v0  = defaultNaNExtF80UI0;
    return uiZ;

 }
--- a/softfloat/source/RISCV/s_commonNaNToF128M.c
+++ b/softfloat/source/RISCV/s_commonNaNToF128M.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -36,7 +36,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 #include <stdint.h>
 #include "platform.h"
-#include "primitives.h"
+#include "primitiveTypes.h"
+
+#define softfloat_commonNaNToF128M softfloat_commonNaNToF128M
 #include "specialize.h"

 /*----------------------------------------------------------------------------
@@ -49,8 +51,10 @@ void
 softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr )
 {

-    softfloat_shortShiftRight128M( (const uint32_t *) &aPtr->v0, 16, zWPtr );
-    zWPtr[indexWordHi( 4 )] |= (uint32_t) aPtr->sign<<31 | 0x7FFF8000;
+    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
+    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
+    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
+    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;

 }

--- a/softfloat/source/RISCV/s_commonNaNToF128UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToF128UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,9 +34,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdint.h>
 #include "platform.h"
-#include "primitives.h"
+#include "primitiveTypes.h"
+
+#define softfloat_commonNaNToF128UI softfloat_commonNaNToF128UI
 #include "specialize.h"

 /*----------------------------------------------------------------------------
@@ -47,8 +48,8 @@ struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN *aPtr )
 {
    struct uint128 uiZ;

-    uiZ = softfloat_shortShiftRight128( aPtr->v64, aPtr->v0, 16 );
-    uiZ.v64 |= (uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FFF800000000000 );
+    uiZ.v64 = defaultNaNF128UI64;
+    uiZ.v0  = defaultNaNF128UI0;
    return uiZ;

 }
--- a/softfloat/source/RISCV/s_commonNaNToF16UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToF16UI.c
@@ -1,51 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-California.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "specialize.h"
-
 /*----------------------------------------------------------------------------
-| Converts the common NaN pointed to by `aPtr' into a 16-bit floating-point
-| NaN, and returns the bit pattern of this value as an unsigned integer.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr )
-{
-
-    return (uint_fast16_t) aPtr->sign<<15 | 0x7E00 | aPtr->v64>>54;
-
-}

--- a/softfloat/source/RISCV/s_commonNaNToF32UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToF32UI.c
@@ -1,51 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "specialize.h"
-
 /*----------------------------------------------------------------------------
-| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
-| NaN, and returns the bit pattern of this value as an unsigned integer.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
-{
-
-    return (uint_fast32_t) aPtr->sign<<31 | 0x7FC00000 | aPtr->v64>>41;
-
-}

--- a/softfloat/source/RISCV/s_commonNaNToF64UI.c
+++ b/softfloat/source/RISCV/s_commonNaNToF64UI.c
@@ -1,53 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "specialize.h"
-
 /*----------------------------------------------------------------------------
-| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
-| NaN, and returns the bit pattern of this value as an unsigned integer.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr )
-{
-
-    return
-        (uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FF8000000000000 )
-            | aPtr->v64>>12;
-
-}

--- a/softfloat/source/RISCV/s_extF80MToCommonNaN.c
+++ b/softfloat/source/RISCV/s_extF80MToCommonNaN.c
@@ -1,62 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "internals.h"
-#include "specialize.h"
-#include "softfloat.h"
-
 /*----------------------------------------------------------------------------
-| Assuming the 80-bit extended floating-point value pointed to by `aSPtr' is
-| a NaN, converts this NaN to the common NaN form, and stores the resulting
-| common NaN at the location pointed to by `zPtr'.  If the NaN is a signaling
-| NaN, the invalid exception is raised.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-void
- softfloat_extF80MToCommonNaN(
-     const struct extFloat80M *aSPtr, struct commonNaN *zPtr )
-{
-
-    if ( extF80M_isSignalingNaN( (const extFloat80_t *) aSPtr ) ) {
-        softfloat_raiseFlags( softfloat_flag_invalid );
-    }
-    zPtr->sign = signExtF80UI64( aSPtr->signExp );
-    zPtr->v64 = aSPtr->signif<<1;
-    zPtr->v0  = 0;
-
-}

--- a/softfloat/source/RISCV/s_extF80UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_extF80UIToCommonNaN.c
@@ -1,62 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "specialize.h"
-#include "softfloat.h"
-
 /*----------------------------------------------------------------------------
-| Assuming the unsigned integer formed from concatenating `uiA64' and `uiA0'
-| has the bit pattern of an 80-bit extended floating-point NaN, converts
-| this NaN to the common NaN form, and stores the resulting common NaN at the
-| location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
-| exception is raised.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-void
- softfloat_extF80UIToCommonNaN(
-     uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr )
-{
-
-    if ( softfloat_isSigNaNExtF80UI( uiA64, uiA0 ) ) {
-        softfloat_raiseFlags( softfloat_flag_invalid );
-    }
-    zPtr->sign = uiA64>>15;
-    zPtr->v64  = uiA0<<1;
-    zPtr->v0   = 0;
-
-}

--- a/softfloat/source/RISCV/s_f128MToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f128MToCommonNaN.c
@@ -1,62 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "primitives.h"
-#include "specialize.h"
-#include "softfloat.h"
-
 /*----------------------------------------------------------------------------
-| Assuming the 128-bit floating-point value pointed to by `aWPtr' is a NaN,
-| converts this NaN to the common NaN form, and stores the resulting common
-| NaN at the location pointed to by `zPtr'.  If the NaN is a signaling NaN,
-| the invalid exception is raised.  Argument `aWPtr' points to an array of
-| four 32-bit elements that concatenate in the platform's normal endian order
-| to form a 128-bit floating-point value.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-void
- softfloat_f128MToCommonNaN( const uint32_t *aWPtr, struct commonNaN *zPtr )
-{
-
-    if ( f128M_isSignalingNaN( (const float128_t *) aWPtr ) ) {
-        softfloat_raiseFlags( softfloat_flag_invalid );
-    }
-    zPtr->sign = aWPtr[indexWordHi( 4 )]>>31;
-    softfloat_shortShiftLeft128M( aWPtr, 16, (uint32_t *) &zPtr->v0 );
-
-}

--- a/softfloat/source/RISCV/s_f128UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f128UIToCommonNaN.c
@@ -1,65 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "primitives.h"
-#include "specialize.h"
-#include "softfloat.h"
-
 /*----------------------------------------------------------------------------
-| Assuming the unsigned integer formed from concatenating `uiA64' and `uiA0'
-| has the bit pattern of a 128-bit floating-point NaN, converts this NaN to
-| the common NaN form, and stores the resulting common NaN at the location
-| pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid exception
-| is raised.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-void
- softfloat_f128UIToCommonNaN(
-     uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr )
-{
-    struct uint128 NaNSig;
-
-    if ( softfloat_isSigNaNF128UI( uiA64, uiA0 ) ) {
-        softfloat_raiseFlags( softfloat_flag_invalid );
-    }
-    NaNSig = softfloat_shortShiftLeft128( uiA64, uiA0, 16 );
-    zPtr->sign = uiA64>>63;
-    zPtr->v64  = NaNSig.v64;
-    zPtr->v0   = NaNSig.v0;
-
-}

--- a/softfloat/source/RISCV/s_f16UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f16UIToCommonNaN.c
@@ -1,59 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
-California.  All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "specialize.h"
-#include "softfloat.h"
-
 /*----------------------------------------------------------------------------
-| Assuming `uiA' has the bit pattern of a 16-bit floating-point NaN, converts
-| this NaN to the common NaN form, and stores the resulting common NaN at the
-| location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
-| exception is raised.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr )
-{
-
-    if ( softfloat_isSigNaNF16UI( uiA ) ) {
-        softfloat_raiseFlags( softfloat_flag_invalid );
-    }
-    zPtr->sign = uiA>>15;
-    zPtr->v64  = (uint_fast64_t) uiA<<54;
-    zPtr->v0   = 0;
-
-}

--- a/softfloat/source/RISCV/s_f32UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f32UIToCommonNaN.c
@@ -1,59 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "specialize.h"
-#include "softfloat.h"
-
 /*----------------------------------------------------------------------------
-| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
-| this NaN to the common NaN form, and stores the resulting common NaN at the
-| location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
-| exception is raised.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr )
-{
-
-    if ( softfloat_isSigNaNF32UI( uiA ) ) {
-        softfloat_raiseFlags( softfloat_flag_invalid );
-    }
-    zPtr->sign = uiA>>31;
-    zPtr->v64  = (uint_fast64_t) uiA<<41;
-    zPtr->v0   = 0;
-
-}

--- a/softfloat/source/RISCV/s_f64UIToCommonNaN.c
+++ b/softfloat/source/RISCV/s_f64UIToCommonNaN.c
@@ -1,59 +1,5 @@

-/*============================================================================
-
-This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
-Package, Release 3e, by John R. Hauser.
-
-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions, and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions, and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- 3. Neither the name of the University nor the names of its contributors may
-    be used to endorse or promote products derived from this software without
-    specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
-DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-=============================================================================*/
-
-#include <stdint.h>
-#include "platform.h"
-#include "specialize.h"
-#include "softfloat.h"
-
 /*----------------------------------------------------------------------------
-| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
-| this NaN to the common NaN form, and stores the resulting common NaN at the
-| location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
-| exception is raised.
+| This file intentionally contains no code.
 *----------------------------------------------------------------------------*/
-void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr )
-{
-
-    if ( softfloat_isSigNaNF64UI( uiA ) ) {
-        softfloat_raiseFlags( softfloat_flag_invalid );
-    }
-    zPtr->sign = uiA>>63;
-    zPtr->v64  = uiA<<12;
-    zPtr->v0   = 0;
-
-}

--- a/softfloat/source/RISCV/s_propagateNaNExtF80M.c
+++ b/softfloat/source/RISCV/s_propagateNaNExtF80M.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,10 +34,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "primitiveTypes.h"
 #include "specialize.h"
 #include "softfloat.h"

@@ -54,54 +53,22 @@ void
     struct extFloat80M *zSPtr
 )
 {
-    bool isSigNaNA;
-    const struct extFloat80M *sPtr;
-    bool isSigNaNB;
-    uint_fast16_t uiB64;
-    uint64_t uiB0;
-    uint_fast16_t uiA64;
-    uint64_t uiA0;
-    uint_fast16_t uiMagA64, uiMagB64;
+    uint_fast16_t ui64;
+    uint_fast64_t ui0;

-    isSigNaNA = extF80M_isSignalingNaN( (const extFloat80_t *) aSPtr );
-    sPtr = aSPtr;
-    if ( ! bSPtr ) {
-        if ( isSigNaNA ) softfloat_raiseFlags( softfloat_flag_invalid );
-        goto copy;
-    }
-    isSigNaNB = extF80M_isSignalingNaN( (const extFloat80_t *) bSPtr );
-    if ( isSigNaNA | isSigNaNB ) {
+    ui64 = aSPtr->signExp;
+    ui0  = aSPtr->signif;
+    if (
+        softfloat_isSigNaNExtF80UI( ui64, ui0 )
+            || (bSPtr
+                    && (ui64 = bSPtr->signExp,
+                        ui0  = bSPtr->signif,
+                        softfloat_isSigNaNExtF80UI( ui64, ui0 )))
+    ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
-        if ( isSigNaNA ) {
-            uiB64 = bSPtr->signExp;
-            if ( isSigNaNB ) goto returnLargerUIMag;
-            uiB0 = bSPtr->signif;
-            if ( isNaNExtF80UI( uiB64, uiB0 ) ) goto copyB;
-            goto copy;
-        } else {
-            uiA64 = aSPtr->signExp;
-            uiA0 = aSPtr->signif;
-            if ( isNaNExtF80UI( uiA64, uiA0 ) ) goto copy;
-            goto copyB;
-        }
    }
-    uiB64 = bSPtr->signExp;
- returnLargerUIMag:
-    uiA64 = aSPtr->signExp;
-    uiMagA64 = uiA64 & 0x7FFF;
-    uiMagB64 = uiB64 & 0x7FFF;
-    if ( uiMagA64 < uiMagB64 ) goto copyB;
-    if ( uiMagB64 < uiMagA64 ) goto copy;
-    uiA0 = aSPtr->signif;
-    uiB0 = bSPtr->signif;
-    if ( uiA0 < uiB0 ) goto copyB;
-    if ( uiB0 < uiA0 ) goto copy;
-    if ( uiA64 < uiB64 ) goto copy;
- copyB:
-    sPtr = bSPtr;
- copy:
-    zSPtr->signExp = sPtr->signExp;
-    zSPtr->signif = sPtr->signif | UINT64_C( 0xC000000000000000 );
+    zSPtr->signExp = defaultNaNExtF80UI64;
+    zSPtr->signif  = defaultNaNExtF80UI0;

 }

--- a/softfloat/source/RISCV/s_propagateNaNExtF80UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNExtF80UI.c
@@ -4,7 +4,7 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014, 2018 The Regents of the University of
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
 California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
@@ -34,17 +34,16 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "primitiveTypes.h"
 #include "specialize.h"
 #include "softfloat.h"

 /*----------------------------------------------------------------------------
-| Interpreting the unsigned integer formed from concatenating 'uiA64' and
-| 'uiA0' as an 80-bit extended floating-point value, and likewise interpreting
-| the unsigned integer formed from concatenating 'uiB64' and 'uiB0' as another
+| Interpreting the unsigned integer formed from concatenating `uiA64' and
+| `uiA0' as an 80-bit extended floating-point value, and likewise interpreting
+| the unsigned integer formed from concatenating `uiB64' and `uiB0' as another
 | 80-bit extended floating-point value, and assuming at least on of these
 | floating-point values is a NaN, returns the bit pattern of the combined NaN
 | result.  If either original floating-point value is a signaling NaN, the
@@ -58,48 +57,16 @@ struct uint128
     uint_fast64_t uiB0
 )
 {
-    bool isSigNaNA, isSigNaNB;
-    uint_fast64_t uiNonsigA0, uiNonsigB0;
-    uint_fast16_t uiMagA64, uiMagB64;
    struct uint128 uiZ;

-    /*------------------------------------------------------------------------
-    *------------------------------------------------------------------------*/
-    isSigNaNA = softfloat_isSigNaNExtF80UI( uiA64, uiA0 );
-    isSigNaNB = softfloat_isSigNaNExtF80UI( uiB64, uiB0 );
-    /*------------------------------------------------------------------------
-    | Make NaNs non-signaling.
-    *------------------------------------------------------------------------*/
-    uiNonsigA0 = uiA0 | UINT64_C( 0xC000000000000000 );
-    uiNonsigB0 = uiB0 | UINT64_C( 0xC000000000000000 );
-    /*------------------------------------------------------------------------
-    *------------------------------------------------------------------------*/
-    if ( isSigNaNA | isSigNaNB ) {
+    if (
+           softfloat_isSigNaNExtF80UI( uiA64, uiA0 )
+        || softfloat_isSigNaNExtF80UI( uiB64, uiB0 )
+    ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
-        if ( isSigNaNA ) {
-            if ( isSigNaNB ) goto returnLargerMag;
-            if ( isNaNExtF80UI( uiB64, uiB0 ) ) goto returnB;
-            goto returnA;
-        } else {
-            if ( isNaNExtF80UI( uiA64, uiA0 ) ) goto returnA;
-            goto returnB;
-        }
    }
- returnLargerMag:
-    uiMagA64 = uiA64 & 0x7FFF;
-    uiMagB64 = uiB64 & 0x7FFF;
-    if ( uiMagA64 < uiMagB64 ) goto returnB;
-    if ( uiMagB64 < uiMagA64 ) goto returnA;
-    if ( uiA0 < uiB0 ) goto returnB;
-    if ( uiB0 < uiA0 ) goto returnA;
-    if ( uiA64 < uiB64 ) goto returnA;
- returnB:
-    uiZ.v64 = uiB64;
-    uiZ.v0  = uiNonsigB0;
-    return uiZ;
- returnA:
-    uiZ.v64 = uiA64;
-    uiZ.v0  = uiNonsigA0;
+    uiZ.v64 = defaultNaNExtF80UI64;
+    uiZ.v0  = defaultNaNExtF80UI0;
    return uiZ;

 }
--- a/softfloat/source/RISCV/s_propagateNaNF128M.c
+++ b/softfloat/source/RISCV/s_propagateNaNF128M.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015, 2018 The Regents of the University of
+California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,43 +34,35 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "primitiveTypes.h"
 #include "specialize.h"
 #include "softfloat.h"

 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 128-bit floating-point values pointed to by
-| `aWPtr' and `bWPtr' is a NaN, stores the combined NaN result at the location
-| pointed to by `zWPtr'.  If either original floating-point value is a
-| signaling NaN, the invalid exception is raised.  Each of `aWPtr', `bWPtr',
-| and `zWPtr' points to an array of four 32-bit elements that concatenate in
+| 'aWPtr' and 'bWPtr' is a NaN, stores the combined NaN result at the location
+| pointed to by 'zWPtr'.  If either original floating-point value is a
+| signaling NaN, the invalid exception is raised.  Each of 'aWPtr', 'bWPtr',
+| and 'zWPtr' points to an array of four 32-bit elements that concatenate in
 | the platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
 void
 softfloat_propagateNaNF128M(
     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr )
 {
-    bool isSigNaNA;
-    const uint32_t *ptr;

-    ptr = aWPtr;
-    isSigNaNA = f128M_isSignalingNaN( (const float128_t *) aWPtr );
    if (
-        isSigNaNA
+        f128M_isSignalingNaN( (const float128_t *) aWPtr )
            || (bWPtr && f128M_isSignalingNaN( (const float128_t *) bWPtr ))
    ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
-        if ( isSigNaNA ) goto copy;
    }
-    if ( ! softfloat_isNaNF128M( aWPtr ) ) ptr = bWPtr;
- copy:
-    zWPtr[indexWordHi( 4 )] = ptr[indexWordHi( 4 )] | 0x00008000;
-    zWPtr[indexWord( 4, 2 )] = ptr[indexWord( 4, 2 )];
-    zWPtr[indexWord( 4, 1 )] = ptr[indexWord( 4, 1 )];
-    zWPtr[indexWord( 4, 0 )] = ptr[indexWord( 4, 0 )];
+    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
+    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
+    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
+    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;

 }

--- a/softfloat/source/RISCV/s_propagateNaNF128UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNF128UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,10 +34,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
+#include "primitiveTypes.h"
 #include "specialize.h"
 #include "softfloat.h"

@@ -58,23 +57,16 @@ struct uint128
     uint_fast64_t uiB0
 )
 {
-    bool isSigNaNA;
    struct uint128 uiZ;

-    isSigNaNA = softfloat_isSigNaNF128UI( uiA64, uiA0 );
-    if ( isSigNaNA || softfloat_isSigNaNF128UI( uiB64, uiB0 ) ) {
+    if (
+           softfloat_isSigNaNF128UI( uiA64, uiA0 )
+        || softfloat_isSigNaNF128UI( uiB64, uiB0 )
+    ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
-        if ( isSigNaNA ) goto returnNonsigA;
    }
-    if ( isNaNF128UI( uiA64, uiA0 ) ) {
- returnNonsigA:
-        uiZ.v64 = uiA64;
-        uiZ.v0  = uiA0;
-    } else {
-        uiZ.v64 = uiB64;
-        uiZ.v0  = uiB0;
-    }
-    uiZ.v64 |= UINT64_C( 0x0000800000000000 );
+    uiZ.v64 = defaultNaNF128UI64;
+    uiZ.v0  = defaultNaNF128UI0;
    return uiZ;

 }
--- a/softfloat/source/RISCV/s_propagateNaNF16UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNF16UI.c
@@ -4,7 +4,7 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
 California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
@@ -34,10 +34,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"

@@ -50,14 +48,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 uint_fast16_t
 softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB )
 {
-    bool isSigNaNA;

-    isSigNaNA = softfloat_isSigNaNF16UI( uiA );
-    if ( isSigNaNA || softfloat_isSigNaNF16UI( uiB ) ) {
+    if ( softfloat_isSigNaNF16UI( uiA ) || softfloat_isSigNaNF16UI( uiB ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
-        if ( isSigNaNA ) return uiA | 0x0200;
    }
-    return (isNaNF16UI( uiA ) ? uiA : uiB) | 0x0200;
+    return defaultNaNF16UI;

 }

--- a/softfloat/source/RISCV/s_propagateNaNF32UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNF32UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,10 +34,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"

@@ -50,14 +48,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 uint_fast32_t
 softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB )
 {
-    bool isSigNaNA;

-    isSigNaNA = softfloat_isSigNaNF32UI( uiA );
-    if ( isSigNaNA || softfloat_isSigNaNF32UI( uiB ) ) {
+    if ( softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
-        if ( isSigNaNA ) return uiA | 0x00400000;
    }
-    return (isNaNF32UI( uiA ) ? uiA : uiB) | 0x00400000;
+    return defaultNaNF32UI;

 }

--- a/softfloat/source/RISCV/s_propagateNaNF64UI.c
+++ b/softfloat/source/RISCV/s_propagateNaNF64UI.c
@@ -4,8 +4,8 @@
 This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
 Package, Release 3e, by John R. Hauser.

-Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
-All rights reserved.
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -34,10 +34,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-#include <stdbool.h>
 #include <stdint.h>
 #include "platform.h"
-#include "internals.h"
 #include "specialize.h"
 #include "softfloat.h"

@@ -50,14 +48,11 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 uint_fast64_t
 softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB )
 {
-    bool isSigNaNA;

-    isSigNaNA = softfloat_isSigNaNF64UI( uiA );
-    if ( isSigNaNA || softfloat_isSigNaNF64UI( uiB ) ) {
+    if ( softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB ) ) {
        softfloat_raiseFlags( softfloat_flag_invalid );
-        if ( isSigNaNA ) return uiA | UINT64_C( 0x0008000000000000 );
    }
-    return (isNaNF64UI( uiA ) ? uiA : uiB) | UINT64_C( 0x0008000000000000 );
+    return defaultNaNF64UI;

 }

--- a/softfloat/source/RISCV/specialize.h
+++ b/softfloat/source/RISCV/specialize.h
@@ -51,19 +51,19 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | The values to return on conversions to 32-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
-#define ui32_fromPosOverflow UINT32_C(0xFFFFFFFF)
-#define ui32_fromNegOverflow UINT32_C(0x0)
-#define ui32_fromNaN UINT32_C(0xFFFFFFFF)
-#define i32_fromPosOverflow INT64_C(0x7FFFFFFF)
-#define i32_fromNegOverflow (-INT64_C(0x7FFFFFFF) - 1)
-#define i32_fromNaN INT64_C(0x7FFFFFFF)
+#define ui32_fromPosOverflow 0xFFFFFFFF
+#define ui32_fromNegOverflow 0
+#define ui32_fromNaN 0xFFFFFFFF
+#define i32_fromPosOverflow 0x7FFFFFFF
+#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
+#define i32_fromNaN 0x7FFFFFFF

 /*----------------------------------------------------------------------------
 | The values to return on conversions to 64-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
 #define ui64_fromPosOverflow UINT64_C(0xFFFFFFFFFFFFFFFF)
-#define ui64_fromNegOverflow UINT64_C(0x0)
+#define ui64_fromNegOverflow 0
 #define ui64_fromNaN UINT64_C(0xFFFFFFFFFFFFFFFF)
 #define i64_fromPosOverflow INT64_C(0x7FFFFFFFFFFFFFFF)
 #define i64_fromNegOverflow (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)
@@ -74,18 +74,13 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | to another.
 *----------------------------------------------------------------------------*/
 struct commonNaN {
-    bool sign;
-#ifdef LITTLEENDIAN
-    uint64_t v0, v64;
-#else
-    uint64_t v64, v0;
-#endif
+    char _unused;
 };

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 16-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF16UI 0xFE00
+#define defaultNaNF16UI 0x7E00

 /*----------------------------------------------------------------------------
 | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
@@ -94,19 +89,38 @@ struct commonNaN {
 *----------------------------------------------------------------------------*/
 #define softfloat_isSigNaNF16UI(uiA) ((((uiA)&0x7E00) == 0x7C00) && ((uiA)&0x01FF))

+/*----------------------------------------------------------------------------
+| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
+| 16-bit brain floating-point (BF16) signaling NaN.
+| Note:  This macro evaluates its argument more than once.
+*----------------------------------------------------------------------------*/
+#define softfloat_isSigNaNBF16UI(uiA) ((((uiA)&0x7FC0) == 0x7F80) && ((uiA)&0x003F))
+
 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
 | this NaN to the common NaN form, and stores the resulting common NaN at the
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f16UIToCommonNaN(uint_fast16_t uiA, struct commonNaN* zPtr);
+#define softfloat_f16UIToCommonNaN(uiA, zPtr)                                                                                              \
+    if(!((uiA)&0x0200))                                                                                                                    \
+    softfloat_raiseFlags(softfloat_flag_invalid)
+
+/*----------------------------------------------------------------------------
+| Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
+| this NaN to the common NaN form, and stores the resulting common NaN at the
+| location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+#define softfloat_bf16UIToCommonNaN(uiA, zPtr)                                                                                             \
+    if(!((uiA)&0x0040))                                                                                                                    \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast16_t softfloat_commonNaNToF16UI(const struct commonNaN* aPtr);
+#define softfloat_commonNaNToF16UI(aPtr) ((uint_fast16_t)defaultNaNF16UI)

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 16-bit floating-
@@ -116,6 +130,17 @@ uint_fast16_t softfloat_commonNaNToF16UI(const struct commonNaN* aPtr);
 *----------------------------------------------------------------------------*/
 uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);

+/*----------------------------------------------------------------------------
+| The bit pattern for a default generated 16-bit BF16 floating-point NaN.
+*----------------------------------------------------------------------------*/
+#define defaultNaNBF16UI 0x7FC0
+
+/*----------------------------------------------------------------------------
+| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
+| NaN, and returns the bit pattern of this value as an unsigned integer.
+*----------------------------------------------------------------------------*/
+#define softfloat_commonNaNToBF16UI(aPtr) ((uint_fast16_t)defaultNaNBF16UI)
+
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
@@ -134,13 +159,15 @@ uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f32UIToCommonNaN(uint_fast32_t uiA, struct commonNaN* zPtr);
+#define softfloat_f32UIToCommonNaN(uiA, zPtr)                                                                                              \
+    if(!((uiA)&0x00400000))                                                                                                                \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast32_t softfloat_commonNaNToF32UI(const struct commonNaN* aPtr);
+#define softfloat_commonNaNToF32UI(aPtr) ((uint_fast32_t)defaultNaNF32UI)

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
@@ -169,13 +196,15 @@ uint_fast32_t softfloat_propagateNaNF32UI(uint_fast32_t uiA, uint_fast32_t uiB);
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f64UIToCommonNaN(uint_fast64_t uiA, struct commonNaN* zPtr);
+#define softfloat_f64UIToCommonNaN(uiA, zPtr)                                                                                              \
+    if(!((uiA)&UINT64_C(0x0008000000000000)))                                                                                              \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast64_t softfloat_commonNaNToF64UI(const struct commonNaN* aPtr);
+#define softfloat_commonNaNToF64UI(aPtr) ((uint_fast64_t)defaultNaNF64UI)

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 64-bit floating-
@@ -188,7 +217,7 @@ uint_fast64_t softfloat_propagateNaNF64UI(uint_fast64_t uiA, uint_fast64_t uiB);
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 80-bit extended floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNExtF80UI64 0xFFFF
+#define defaultNaNExtF80UI64 0x7FFF
 #define defaultNaNExtF80UI0 UINT64_C(0xC000000000000000)

 /*----------------------------------------------------------------------------
@@ -214,14 +243,26 @@ uint_fast64_t softfloat_propagateNaNF64UI(uint_fast64_t uiA, uint_fast64_t uiB);
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_extF80UIToCommonNaN(uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);
+#define softfloat_extF80UIToCommonNaN(uiA64, uiA0, zPtr)                                                                                   \
+    if(!((uiA0)&UINT64_C(0x4000000000000000)))                                                                                             \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and returns the bit pattern of this value as an unsigned
 | integer.
 *----------------------------------------------------------------------------*/
+#if defined INLINE && !defined softfloat_commonNaNToExtF80UI
+INLINE
+struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr) {
+    struct uint128 uiZ;
+    uiZ.v64 = defaultNaNExtF80UI64;
+    uiZ.v0 = defaultNaNExtF80UI0;
+    return uiZ;
+}
+#else
 struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr);
+#endif

 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -237,7 +278,7 @@ struct uint128 softfloat_propagateNaNExtF80UI(uint_fast16_t uiA64, uint_fast64_t
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF128UI64 UINT64_C(0xFFFF800000000000)
+#define defaultNaNF128UI64 UINT64_C(0x7FFF800000000000)
 #define defaultNaNF128UI0 UINT64_C(0)

 /*----------------------------------------------------------------------------
@@ -256,13 +297,25 @@ struct uint128 softfloat_propagateNaNExtF80UI(uint_fast16_t uiA64, uint_fast64_t
 | pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid exception
 | is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f128UIToCommonNaN(uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);
+#define softfloat_f128UIToCommonNaN(uiA64, uiA0, zPtr)                                                                                     \
+    if(!((uiA64)&UINT64_C(0x0000800000000000)))                                                                                            \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
+#if defined INLINE && !defined softfloat_commonNaNToF128UI
+INLINE
+struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN* aPtr) {
+    struct uint128 uiZ;
+    uiZ.v64 = defaultNaNF128UI64;
+    uiZ.v0 = defaultNaNF128UI0;
+    return uiZ;
+}
+#else
 struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN*);
+#endif

 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -288,14 +341,24 @@ struct uint128 softfloat_propagateNaNF128UI(uint_fast64_t uiA64, uint_fast64_t u
 | common NaN at the location pointed to by 'zPtr'.  If the NaN is a signaling
 | NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_extF80MToCommonNaN(const struct extFloat80M* aSPtr, struct commonNaN* zPtr);
+#define softfloat_extF80MToCommonNaN(aSPtr, zPtr)                                                                                          \
+    if(!((aSPtr)->signif & UINT64_C(0x4000000000000000)))                                                                                  \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and stores this NaN at the location pointed to by
 | 'zSPtr'.
 *----------------------------------------------------------------------------*/
+#if defined INLINE && !defined softfloat_commonNaNToExtF80M
+INLINE
+void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr) {
+    zSPtr->signExp = defaultNaNExtF80UI64;
+    zSPtr->signif = defaultNaNExtF80UI0;
+}
+#else
 void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr);
+#endif

 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 80-bit extended floating-point values
@@ -308,7 +371,7 @@ void softfloat_propagateNaNExtF80M(const struct extFloat80M* aSPtr, const struct
 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF128UI96 0xFFFF8000
+#define defaultNaNF128UI96 0x7FFF8000
 #define defaultNaNF128UI64 0
 #define defaultNaNF128UI32 0
 #define defaultNaNF128UI0 0
@@ -321,7 +384,9 @@ void softfloat_propagateNaNExtF80M(const struct extFloat80M* aSPtr, const struct
 | four 32-bit elements that concatenate in the platform's normal endian order
 | to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void softfloat_f128MToCommonNaN(const uint32_t* aWPtr, struct commonNaN* zPtr);
+#define softfloat_f128MToCommonNaN(aWPtr, zPtr)                                                                                            \
+    if(!((aWPtr)[indexWordHi(4)] & UINT64_C(0x0000800000000000)))                                                                          \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
@@ -329,7 +394,17 @@ void softfloat_f128MToCommonNaN(const uint32_t* aWPtr, struct commonNaN* zPtr);
 | 'zWPtr' points to an array of four 32-bit elements that concatenate in the
 | platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
+#if defined INLINE && !defined softfloat_commonNaNToF128M
+INLINE
+void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr) {
+    zWPtr[indexWord(4, 3)] = defaultNaNF128UI96;
+    zWPtr[indexWord(4, 2)] = defaultNaNF128UI64;
+    zWPtr[indexWord(4, 1)] = defaultNaNF128UI32;
+    zWPtr[indexWord(4, 0)] = defaultNaNF128UI0;
+}
+#else
 void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr);
+#endif

 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 128-bit floating-point values pointed to by
--- a/softfloat/source/bf16_isSignalingNaN.c
+++ b/softfloat/source/bf16_isSignalingNaN.c
@@ -0,0 +1,51 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool bf16_isSignalingNaN( bfloat16_t a )
+{
+    union ui16_bf16 uA;
+
+    uA.f = a;
+    return softfloat_isSigNaNBF16UI( uA.ui );
+
+}
+
--- a/softfloat/source/bf16_to_f32.c
+++ b/softfloat/source/bf16_to_f32.c
@@ -0,0 +1,90 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float32_t bf16_to_f32( bfloat16_t a )
+{
+    union ui16_bf16 uA;
+    uint_fast16_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint_fast16_t frac;
+    struct commonNaN commonNaN;
+    uint_fast32_t uiZ;
+    struct exp8_sig16 normExpSig;
+    union ui32_f32 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signBF16UI( uiA );
+    exp  = expBF16UI( uiA );
+    frac = fracBF16UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // NaN or Inf
+    if ( exp == 0xFF ) {
+        if ( frac ) {
+            softfloat_bf16UIToCommonNaN( uiA, &commonNaN );
+            uiZ = softfloat_commonNaNToF32UI( &commonNaN );
+        } else {
+            uiZ = packToF32UI( sign, 0xFF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // packToF32UI simply packs bitfields without any numerical change
+    // which means it can be used directly for any BF16 to f32 conversions which
+    // does not require bits manipulation
+    // (that is everything where the 16-bit are just padded right with 16 zeros, including
+    //  subnormal numbers)
+    uiZ = packToF32UI( sign, exp, ((uint_fast32_t) frac) <<16 );
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
+
+
--- a/softfloat/source/f32_to_bf16.c
+++ b/softfloat/source/f32_to_bf16.c
@@ -0,0 +1,105 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+#include <inttypes.h>
+#include <stdio.h>
+
+bfloat16_t f32_to_bf16( float32_t a )
+{
+    union ui32_f32 uA;
+    uint_fast32_t uiA;
+    bool sign;
+    int_fast16_t exp;
+    uint_fast32_t frac;
+    struct commonNaN commonNaN;
+    uint_fast16_t uiZ, frac16;
+    union ui16_bf16 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    uA.f = a;
+    uiA = uA.ui;
+    sign = signF32UI( uiA );
+    exp  = expF32UI( uiA );
+    frac = fracF32UI( uiA );
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // infinity or NaN cases
+    if ( exp == 0xFF ) {
+        if ( frac ) {
+            // NaN case
+            softfloat_f32UIToCommonNaN( uiA, &commonNaN );
+            uiZ = softfloat_commonNaNToBF16UI( &commonNaN );
+        } else {
+            // infinity case
+            uiZ = packToBF16UI( sign, 0xFF, 0 );
+        }
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // frac is a 24-bit mantissa, right shifted by 9
+    // In the normal case, (24-9) = 15 are set 
+    frac16 = frac>>9 | ((frac & 0x1FF) != 0);
+    if ( ! (exp | frac16) ) {
+        uiZ = packToBF16UI( sign, 0, 0 );
+        goto uiZ;
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    // softfloat_roundPackToBF16 exponent argument (2nd argument)
+    // must correspond to the exponent of fracIn[13] bits
+    // (fracIn is the 3rd and last argument) 
+    uint_fast32_t mask = exp ? 0x4000 : 0x0; // implicit one mask added if input is a normal number
+    // exponent for the lowest normal and largest subnormal should be equal
+    // but is not in IEEE encoding so mantissa must be partially normalized
+    // (by one bit) for subnormal numbers. Such that (exp - 1) corresponds
+    // to the exponent of frac16[13]
+    frac16 = frac16 << (exp ? 0 : 1);
+    return softfloat_roundPackToBF16( sign, exp - 1, frac16 | mask );
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
--- a/softfloat/source/f32_to_f16.c
+++ b/softfloat/source/f32_to_f16.c
@@ -72,6 +72,9 @@ float16_t f32_to_f16( float32_t a )
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
+    // frac is a 24-bit significand, the bottom 9 bits LSB are extracted and OR-red
+    // into a sticky flag, the top 15 MSBs are extracted, the LSB of this top slice
+    // is OR-red with the sticky 
    frac16 = frac>>9 | ((frac & 0x1FF) != 0);
    if ( ! (exp | frac16) ) {
        uiZ = packToF16UI( sign, 0, 0 );
--- a/softfloat/source/include/internals.h
+++ b/softfloat/source/include/internals.h
@@ -46,6 +46,10 @@ union ui16_f16 {
    uint16_t ui;
    float16_t f;
 };
+union ui16_bf16 {
+    uint16_t ui;
+    bfloat16_t f;
+};
 union ui32_f32 {
    uint32_t ui;
    float32_t f;
@@ -108,6 +112,18 @@ float16_t softfloat_addMagsF16(uint_fast16_t, uint_fast16_t);
 float16_t softfloat_subMagsF16(uint_fast16_t, uint_fast16_t);
 float16_t softfloat_mulAddF16(uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t);

+/*----------------------------------------------------------------------------
+ *----------------------------------------------------------------------------*/
+#define signBF16UI(a) ((bool)((uint16_t)(a) >> 15))
+#define expBF16UI(a) ((int_fast16_t)((a) >> 7) & 0xFF)
+#define fracBF16UI(a) ((a)&0x07F)
+#define packToBF16UI(sign, exp, sig) (((uint16_t)(sign) << 15) + ((uint16_t)(exp) << 7) + (sig))
+
+#define isNaNBF16UI(a) (((~(a)&0x7FC0) == 0) && ((a)&0x07F))
+
+bfloat16_t softfloat_roundPackToBF16(bool, int_fast16_t, uint_fast16_t);
+struct exp8_sig16 softfloat_normSubnormalBF16Sig(uint_fast16_t);
+
 /*----------------------------------------------------------------------------
 *----------------------------------------------------------------------------*/
 #define signF32UI(a) ((bool)((uint32_t)(a) >> 31))
--- a/softfloat/source/include/softfloat.h
+++ b/softfloat/source/include/softfloat.h
@@ -76,13 +76,13 @@ enum {
 | Software floating-point exception flags.
 *----------------------------------------------------------------------------*/
 extern THREAD_LOCAL uint_fast8_t softfloat_exceptionFlags;
-enum {
+typedef enum {
    softfloat_flag_inexact = 1,
    softfloat_flag_underflow = 2,
    softfloat_flag_overflow = 4,
    softfloat_flag_infinite = 8,
    softfloat_flag_invalid = 16
-};
+} exceptionFlag_t;

 /*----------------------------------------------------------------------------
 | Routine to raise any or all of the software floating-point exception flags.
@@ -164,6 +164,13 @@ bool f16_le_quiet(float16_t, float16_t);
 bool f16_lt_quiet(float16_t, float16_t);
 bool f16_isSignalingNaN(float16_t);

+/*----------------------------------------------------------------------------
+| 16-bit (brain float 16) floating-point operations.
+*----------------------------------------------------------------------------*/
+float32_t bf16_to_f32(bfloat16_t);
+bfloat16_t f32_to_bf16(float32_t);
+bool bf16_isSignalingNaN(bfloat16_t);
+
 /*----------------------------------------------------------------------------
 | 32-bit (single-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
--- a/softfloat/source/include/softfloat_types.h
+++ b/softfloat/source/include/softfloat_types.h
@@ -50,6 +50,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 typedef struct {
    uint16_t v;
 } float16_t;
+typedef struct {
+    uint16_t v;
+} bfloat16_t;
 typedef struct {
    uint32_t v;
 } float32_t;
--- a/softfloat/source/s_mulAddF32.c
+++ b/softfloat/source/s_mulAddF32.c
@@ -221,4 +221,3 @@ float32_t
    return uZ.f;

 }
-
--- a/softfloat/source/s_normSubnormalBF16Sig.c
+++ b/softfloat/source/s_normSubnormalBF16Sig.c
@@ -0,0 +1,52 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+
+struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t sig )
+{
+    int_fast8_t shiftDist;
+    struct exp8_sig16 z;
+
+    shiftDist = softfloat_countLeadingZeros16( sig ) - 8;
+    z.exp = 1 - shiftDist;
+    z.sig = sig<<shiftDist;
+    return z;
+
+}
+
--- a/softfloat/source/s_roundPackToBF16.c
+++ b/softfloat/source/s_roundPackToBF16.c
@@ -0,0 +1,114 @@
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3e, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+/** sig last significant bit is sig[7], the 7 LSBs will be used for rounding */
+bfloat16_t
+ softfloat_roundPackToBF16( bool sign, int_fast16_t exp, uint_fast16_t sig )
+{
+    uint_fast8_t roundingMode;
+    bool roundNearEven;
+    uint_fast8_t roundIncrement, roundBits;
+    bool isTiny;
+    uint_fast16_t uiZ;
+    union ui16_bf16 uZ;
+
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    roundingMode = softfloat_roundingMode;
+    roundNearEven = (roundingMode == softfloat_round_near_even);
+    roundIncrement = 0x40;
+    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
+        roundIncrement =
+            (roundingMode
+                 == (sign ? softfloat_round_min : softfloat_round_max))
+                ? 0x7F
+                : 0;
+    }
+    roundBits = sig & 0x7F;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    if ( 0xFD <= (unsigned int) exp ) {
+        if ( exp < 0 ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            isTiny =
+                (softfloat_detectTininess == softfloat_tininess_beforeRounding)
+                    || (exp < -1) || (sig + roundIncrement < 0x8000);
+            sig = softfloat_shiftRightJam32( sig, -exp );
+            exp = 0;
+            roundBits = sig & 0x7F;
+            if ( isTiny && roundBits ) {
+                softfloat_raiseFlags( softfloat_flag_underflow );
+            }
+        } else if ( (0xFD < exp) || (0x8000 <= sig + roundIncrement) ) {
+            /*----------------------------------------------------------------
+            *----------------------------------------------------------------*/
+            softfloat_raiseFlags(
+                softfloat_flag_overflow | softfloat_flag_inexact );
+            uiZ = packToBF16UI( sign, 0xFF, 0 ) - ! roundIncrement;
+            goto uiZ;
+        }
+    }
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+    sig = (sig + roundIncrement)>>7;
+    if ( roundBits ) {
+        softfloat_exceptionFlags |= softfloat_flag_inexact;
+#ifdef SOFTFLOAT_ROUND_ODD
+        if ( roundingMode == softfloat_round_odd ) {
+            sig |= 1;
+            goto packReturn;
+        }
+#endif
+    }
+    sig &= ~(uint_fast16_t) (! (roundBits ^ 0x40) & roundNearEven);
+    if ( ! sig ) exp = 0;
+    /*------------------------------------------------------------------------
+    *------------------------------------------------------------------------*/
+ packReturn:
+    uiZ = packToBF16UI( sign, exp, sig );
+ uiZ:
+    uZ.ui = uiZ;
+    return uZ.f;
+
+}
+
--- a/src/elfio.cpp
+++ b/src/elfio.cpp
@@ -0,0 +1,35 @@
+#ifdef _MSC_VER
+#define _SCL_SECURE_NO_WARNINGS
+#define ELFIO_NO_INTTYPES
+#endif
+
+#include <elfio/elfio_dump.hpp>
+#include <iostream>
+
+using namespace ELFIO;
+
+int main(int argc, char** argv) {
+    if(argc != 2) {
+        printf("Usage: elfdump <file_name>\n");
+        return 1;
+    }
+
+    elfio reader;
+
+    if(!reader.load(argv[1])) {
+        printf("File %s is not found or it is not an ELF file\n", argv[1]);
+        return 1;
+    }
+
+    dump::header(std::cout, reader);
+    dump::section_headers(std::cout, reader);
+    dump::segment_headers(std::cout, reader);
+    dump::symbol_tables(std::cout, reader);
+    dump::notes(std::cout, reader);
+    dump::modinfo(std::cout, reader);
+    dump::dynamic_tags(std::cout, reader);
+    dump::section_datas(std::cout, reader);
+    dump::segment_datas(std::cout, reader);
+
+    return 0;
+}
--- a/src/iss/arch/hwl.h
+++ b/src/iss/arch/hwl.h
@@ -51,8 +51,8 @@ public:
    virtual ~hwl() = default;

 protected:
-    iss::status read_custom_csr_reg(unsigned addr, reg_t& val) override;
-    iss::status write_custom_csr_reg(unsigned addr, reg_t val) override;
+    iss::status read_custom_csr(unsigned addr, reg_t& val) override;
+    iss::status write_custom_csr(unsigned addr, reg_t val) override;
 };

 template <typename BASE>
@@ -68,7 +68,7 @@ inline hwl<BASE>::hwl(feature_config cfg)
    }
 }

-template <typename BASE> inline iss::status iss::arch::hwl<BASE>::read_custom_csr_reg(unsigned addr, reg_t& val) {
+template <typename BASE> inline iss::status iss::arch::hwl<BASE>::read_custom_csr(unsigned addr, reg_t& val) {
    switch(addr) {
    case 0x800:
        val = this->reg.lpstart0;
@@ -92,7 +92,7 @@ template <typename BASE> inline iss::status iss::arch::hwl<BASE>::read_custom_cs
    return iss::Ok;
 }

-template <typename BASE> inline iss::status iss::arch::hwl<BASE>::write_custom_csr_reg(unsigned addr, reg_t val) {
+template <typename BASE> inline iss::status iss::arch::hwl<BASE>::write_custom_csr(unsigned addr, reg_t val) {
    switch(addr) {
    case 0x800:
        this->reg.lpstart0 = val;
--- a/src/iss/arch/mstatus.h
+++ b/src/iss/arch/mstatus.h
@@ -0,0 +1,233 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+#ifndef _MSTATUS_TYPE
+#define _MSTATUS_TYPE
+
+#include <cstdint>
+#include <type_traits>
+#include <util/bit_field.h>
+#include <util/ities.h>
+
+namespace iss {
+namespace arch {
+
+template <class T, class Enable = void> struct status {};
+// specialization 32bit
+template <typename T> struct status<T, typename std::enable_if<std::is_same<T, uint32_t>::value>::type> {
+    static inline unsigned SD(T v) { return bit_sub<63, 1>(v); }
+    // value of XLEN for S-mode
+    static inline unsigned SXL(T v) { return bit_sub<34, 2>(v); };
+    // value of XLEN for U-mode
+    static inline unsigned UXL(T v) { return bit_sub<32, 2>(v); };
+    // Trap SRET
+    static inline unsigned TSR(T v) { return bit_sub<22, 1>(v); };
+    // Timeout Wait
+    static inline unsigned TW(T v) { return bit_sub<21, 1>(v); };
+    // Trap Virtual Memory
+    static inline unsigned TVM(T v) { return bit_sub<20, 1>(v); };
+    // Make eXecutable Readable
+    static inline unsigned MXR(T v) { return bit_sub<19, 1>(v); };
+    // permit Supervisor User Memory access
+    static inline unsigned SUM(T v) { return bit_sub<18, 1>(v); };
+    // Modify PRiVilege
+    static inline unsigned MPRV(T v) { return bit_sub<17, 1>(v); };
+    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
+    // dirty, some clean/Some dirty
+    static inline unsigned XS(T v) { return bit_sub<15, 2>(v); };
+    // floating-point unit status Off/Initial/Clean/Dirty
+    static inline unsigned FS(T v) { return bit_sub<13, 2>(v); };
+    // machine previous privilege
+    static inline unsigned MPP(T v) { return bit_sub<11, 2>(v); };
+    // supervisor previous privilege
+    static inline unsigned SPP(T v) { return bit_sub<8, 1>(v); };
+    // previous machine interrupt-enable
+    static inline unsigned MPIE(T v) { return bit_sub<7, 1>(v); };
+    // previous supervisor interrupt-enable
+    static inline unsigned SPIE(T v) { return bit_sub<5, 1>(v); };
+    // previous user interrupt-enable
+    static inline unsigned UPIE(T v) { return bit_sub<4, 1>(v); };
+    // machine interrupt-enable
+    static inline unsigned MIE(T v) { return bit_sub<3, 1>(v); };
+    // supervisor interrupt-enable
+    static inline unsigned SIE(T v) { return bit_sub<1, 1>(v); };
+    // user interrupt-enable
+    static inline unsigned UIE(T v) { return bit_sub<0, 1>(v); };
+};
+
+template <typename T> struct status<T, typename std::enable_if<std::is_same<T, uint64_t>::value>::type> {
+public:
+    // SD bit is read-only and is set when either the FS or XS bits encode a Dirty state (i.e., SD=((FS==11) OR
+    // XS==11)))
+    static inline unsigned SD(T v) { return bit_sub<63, 1>(v); };
+    // value of XLEN for S-mode
+    static inline unsigned SXL(T v) { return bit_sub<34, 2>(v); };
+    // value of XLEN for U-mode
+    static inline unsigned UXL(T v) { return bit_sub<32, 2>(v); };
+    // Trap SRET
+    static inline unsigned TSR(T v) { return bit_sub<22, 1>(v); };
+    // Timeout Wait
+    static inline unsigned TW(T v) { return bit_sub<21, 1>(v); };
+    // Trap Virtual Memory
+    static inline unsigned TVM(T v) { return bit_sub<20, 1>(v); };
+    // Make eXecutable Readable
+    static inline unsigned MXR(T v) { return bit_sub<19, 1>(v); };
+    // permit Supervisor User Memory access
+    static inline unsigned SUM(T v) { return bit_sub<18, 1>(v); };
+    // Modify PRiVilege
+    static inline unsigned MPRV(T v) { return bit_sub<17, 1>(v); };
+    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
+    // dirty, some clean/Some dirty
+    static inline unsigned XS(T v) { return bit_sub<15, 2>(v); };
+    // floating-point unit status Off/Initial/Clean/Dirty
+    static inline unsigned FS(T v) { return bit_sub<13, 2>(v); };
+    // machine previous privilege
+    static inline unsigned MPP(T v) { return bit_sub<11, 2>(v); };
+    // supervisor previous privilege
+    static inline unsigned SPP(T v) { return bit_sub<8, 1>(v); };
+    // previous machine interrupt-enable
+    static inline unsigned MPIE(T v) { return bit_sub<7, 1>(v); };
+    // previous supervisor interrupt-enable
+    static inline unsigned SPIE(T v) { return bit_sub<5, 1>(v); };
+    // previous user interrupt-enable
+    static inline unsigned UPIE(T v) { return bit_sub<4, 1>(v); };
+    // machine interrupt-enable
+    static inline unsigned MIE(T v) { return bit_sub<3, 1>(v); };
+    // supervisor interrupt-enable
+    static inline unsigned SIE(T v) { return bit_sub<1, 1>(v); };
+    // user interrupt-enable
+    static inline unsigned UIE(T v) { return bit_sub<0, 1>(v); };
+};
+
+// primary template
+template <class T, class Enable = void> struct hart_state {};
+// specialization 32bit
+template <typename T> class hart_state<T, typename std::enable_if<std::is_same<T, uint32_t>::value>::type> {
+public:
+    BEGIN_BF_DECL(mstatus_t, T);
+    // SD bit is read-only and is set when either the FS or XS bits encode a Dirty state (i.e., SD=((FS==11) OR
+    // XS==11)))
+    BF_FIELD(SD, 31, 1);
+    // Trap SRET
+    BF_FIELD(TSR, 22, 1);
+    // Timeout Wait
+    BF_FIELD(TW, 21, 1);
+    // Trap Virtual Memory
+    BF_FIELD(TVM, 20, 1);
+    // Make eXecutable Readable
+    BF_FIELD(MXR, 19, 1);
+    // permit Supervisor User Memory access
+    BF_FIELD(SUM, 18, 1);
+    // Modify PRiVilege
+    BF_FIELD(MPRV, 17, 1);
+    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
+    // dirty, some clean/Some dirty
+    BF_FIELD(XS, 15, 2);
+    // floating-point unit status Off/Initial/Clean/Dirty
+    BF_FIELD(FS, 13, 2);
+    // machine previous privilege
+    BF_FIELD(MPP, 11, 2);
+    // supervisor previous privilege
+    BF_FIELD(SPP, 8, 1);
+    // previous machine interrupt-enable
+    BF_FIELD(MPIE, 7, 1);
+    // previous supervisor interrupt-enable
+    BF_FIELD(SPIE, 5, 1);
+    // previous user interrupt-enable
+    BF_FIELD(UPIE, 4, 1);
+    // machine interrupt-enable
+    BF_FIELD(MIE, 3, 1);
+    // supervisor interrupt-enable
+    BF_FIELD(SIE, 1, 1);
+    // user interrupt-enable
+    BF_FIELD(UIE, 0, 1);
+    END_BF_DECL();
+
+    mstatus_t mstatus;
+
+    static const T mstatus_reset_val = 0x1800;
+};
+
+// specialization 64bit
+template <typename T> class hart_state<T, typename std::enable_if<std::is_same<T, uint64_t>::value>::type> {
+public:
+    BEGIN_BF_DECL(mstatus_t, T);
+    // SD bit is read-only and is set when either the FS or XS bits encode a Dirty state (i.e., SD=((FS==11) OR
+    // XS==11)))
+    BF_FIELD(SD, 63, 1);
+    // value of XLEN for S-mode
+    BF_FIELD(SXL, 34, 2);
+    // value of XLEN for U-mode
+    BF_FIELD(UXL, 32, 2);
+    // Trap SRET
+    BF_FIELD(TSR, 22, 1);
+    // Timeout Wait
+    BF_FIELD(TW, 21, 1);
+    // Trap Virtual Memory
+    BF_FIELD(TVM, 20, 1);
+    // Make eXecutable Readable
+    BF_FIELD(MXR, 19, 1);
+    // permit Supervisor User Memory access
+    BF_FIELD(SUM, 18, 1);
+    // Modify PRiVilege
+    BF_FIELD(MPRV, 17, 1);
+    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
+    // dirty, some clean/Some dirty
+    BF_FIELD(XS, 15, 2);
+    // floating-point unit status Off/Initial/Clean/Dirty
+    BF_FIELD(FS, 13, 2);
+    // machine previous privilege
+    BF_FIELD(MPP, 11, 2);
+    // supervisor previous privilege
+    BF_FIELD(SPP, 8, 1);
+    // previous machine interrupt-enable
+    BF_FIELD(MPIE, 7, 1);
+    // previous supervisor interrupt-enable
+    BF_FIELD(SPIE, 5, 1);
+    // previous user interrupt-enable
+    BF_FIELD(UPIE, 4, 1);
+    // machine interrupt-enable
+    BF_FIELD(MIE, 3, 1);
+    // supervisor interrupt-enable
+    BF_FIELD(SIE, 1, 1);
+    // user interrupt-enable
+    BF_FIELD(UIE, 0, 1);
+    END_BF_DECL();
+
+    mstatus_t mstatus;
+
+    static const T mstatus_reset_val = 0x1800;
+};
+} // namespace arch
+} // namespace iss
+#endif // _MSTATUS_TYPE
--- a/src/iss/arch/riscv_hart_common.h
+++ b/src/iss/arch/riscv_hart_common.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017, 2018, 2021 MINRES Technologies GmbH
+ * Copyright (C) 2017 - 2025 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -35,21 +35,38 @@
 #ifndef _RISCV_HART_COMMON
 #define _RISCV_HART_COMMON

+#include "iss/arch/traits.h"
+#include "iss/log_categories.h"
+#include "iss/mmio/memory_if.h"
+#include "iss/vm_types.h"
+#include "mstatus.h"
+#include "util/delegate.h"
+#include <array>
 #include <cstdint>
 #include <elfio/elfio.hpp>
 #include <fmt/format.h>
 #include <iss/arch_if.h>
 #include <iss/log_categories.h>
+#include <iss/semihosting/semihosting.h>
+#include <limits>
+#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <util/logging.h>
+#include <util/sparse_array.h>
+
+#if defined(__GNUC__)
+#define likely(x) ::__builtin_expect(!!(x), 1)
+#define unlikely(x) ::__builtin_expect(!!(x), 0)
+#else
+#define likely(x) x
+#define unlikely(x) x
+#endif

 namespace iss {
 namespace arch {

-enum { tohost_dflt = 0xF0001000, fromhost_dflt = 0xF0001040 };
-
-enum features_e { FEAT_NONE, FEAT_PMP = 1, FEAT_EXT_N = 2, FEAT_CLIC = 4, FEAT_DEBUG = 8, FEAT_TCM = 16 };
+enum features_e { FEAT_NONE, FEAT_EXT_N = 1, FEAT_DEBUG = 2 };

 enum riscv_csr {
    /* user-level CSR */
@@ -225,10 +242,6 @@ struct vm_info {
 };

 struct feature_config {
-    uint64_t clic_base{0xc0000000};
-    unsigned clic_int_ctl_bits{4};
-    unsigned clic_num_irq{16};
-    unsigned clic_num_trigger{0};
    uint64_t tcm_base{0x10000000};
    uint64_t tcm_size{0x8000};
    uint64_t io_address{0xf0000000};
@@ -261,101 +274,579 @@ public:
    : trap_access(15 << 16, badaddr) {}
 };

-inline void read_reg_uint32(uint64_t offs, uint32_t& reg, uint8_t* const data, unsigned length) {
-    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
-    switch(offs & 0x3) {
-    case 0:
-        for(auto i = 0U; i < length; ++i)
-            *(data + i) = *(reg_ptr + i);
-        break;
-    case 1:
-        for(auto i = 0U; i < length; ++i)
-            *(data + i) = *(reg_ptr + 1 + i);
-        break;
-    case 2:
-        for(auto i = 0U; i < length; ++i)
-            *(data + i) = *(reg_ptr + 2 + i);
-        break;
-    case 3:
-        *data = *(reg_ptr + 3);
-        break;
-    }
-}
+template <typename WORD_TYPE> struct priv_if {
+    using rd_csr_f = std::function<iss::status(unsigned addr, WORD_TYPE&)>;
+    using wr_csr_f = std::function<iss::status(unsigned addr, WORD_TYPE)>;

-inline void write_reg_uint32(uint64_t offs, uint32_t& reg, const uint8_t* const data, unsigned length) {
-    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
-    switch(offs & 0x3) {
-    case 0:
-        for(auto i = 0U; i < length; ++i)
-            *(reg_ptr + i) = *(data + i);
-        break;
-    case 1:
-        for(auto i = 0U; i < length; ++i)
-            *(reg_ptr + 1 + i) = *(data + i);
-        break;
-    case 2:
-        for(auto i = 0U; i < length; ++i)
-            *(reg_ptr + 2 + i) = *(data + i);
-        break;
-    case 3:
-        *(reg_ptr + 3) = *data;
-        break;
+    std::function<iss::status(unsigned, WORD_TYPE&)> read_csr;
+    std::function<iss::status(unsigned, WORD_TYPE)> write_csr;
+    std::function<iss::status(uint8_t const*)> exec_htif;
+    std::unordered_map<unsigned, rd_csr_f>& csr_rd_cb;
+    std::unordered_map<unsigned, wr_csr_f>& csr_wr_cb;
+    hart_state<WORD_TYPE>& mstatus;
+    uint64_t& tohost;
+    uint64_t& fromhost;
+    unsigned& mcause_max_irq;
+};
+
+template <typename BASE, typename LOGCAT = logging::disass> struct riscv_hart_common : public BASE, public mmio::memory_elem {
+    const std::array<const char, 4> lvl = {{'U', 'S', 'H', 'M'}};
+    const std::array<const char*, 16> trap_str = {{""
+                                                   "Instruction address misaligned", // 0
+                                                   "Instruction access fault",       // 1
+                                                   "Illegal instruction",            // 2
+                                                   "Breakpoint",                     // 3
+                                                   "Load address misaligned",        // 4
+                                                   "Load access fault",              // 5
+                                                   "Store/AMO address misaligned",   // 6
+                                                   "Store/AMO access fault",         // 7
+                                                   "Environment call from U-mode",   // 8
+                                                   "Environment call from S-mode",   // 9
+                                                   "Reserved",                       // a
+                                                   "Environment call from M-mode",   // b
+                                                   "Instruction page fault",         // c
+                                                   "Load page fault",                // d
+                                                   "Reserved",                       // e
+                                                   "Store/AMO page fault"}};
+    const std::array<const char*, 12> irq_str = {{"User software interrupt", "Supervisor software interrupt", "Reserved",
+                                                  "Machine software interrupt", "User timer interrupt", "Supervisor timer interrupt",
+                                                  "Reserved", "Machine timer interrupt", "User external interrupt",
+                                                  "Supervisor external interrupt", "Reserved", "Machine external interrupt"}};
+    constexpr static unsigned MEM = traits<BASE>::MEM;
+
+    using core = BASE;
+    using this_class = riscv_hart_common<BASE, LOGCAT>;
+    using phys_addr_t = typename core::phys_addr_t;
+    using reg_t = typename core::reg_t;
+    using addr_t = typename core::addr_t;
+
+    using rd_csr_f = std::function<iss::status(unsigned addr, reg_t&)>;
+    using wr_csr_f = std::function<iss::status(unsigned addr, reg_t)>;
+
+#define MK_CSR_RD_CB(FCT) [this](unsigned a, reg_t& r) -> iss::status { return this->FCT(a, r); };
+#define MK_CSR_WR_CB(FCT) [this](unsigned a, reg_t r) -> iss::status { return this->FCT(a, r); };
+
+    riscv_hart_common()
+    : state()
+    , instr_if(*this) {
+        // reset values
+        csr[misa] = traits<BASE>::MISA_VAL;
+        csr[mvendorid] = 0x669;
+        csr[marchid] = traits<BASE>::MARCHID_VAL;
+        csr[mimpid] = 1;
+
+        if(traits<BASE>::FLEN > 0) {
+            csr_rd_cb[fcsr] = MK_CSR_RD_CB(read_fcsr);
+            csr_wr_cb[fcsr] = MK_CSR_WR_CB(write_fcsr);
+            csr_rd_cb[fflags] = MK_CSR_RD_CB(read_fcsr);
+            csr_wr_cb[fflags] = MK_CSR_WR_CB(write_fcsr);
+            csr_rd_cb[frm] = MK_CSR_RD_CB(read_fcsr);
+            csr_wr_cb[frm] = MK_CSR_WR_CB(write_fcsr);
+        }
+        for(unsigned addr = mhpmcounter3; addr <= mhpmcounter31; ++addr) {
+            csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+            csr_wr_cb[addr] = MK_CSR_WR_CB(write_plain);
+        }
+        if(traits<BASE>::XLEN == 32)
+            for(unsigned addr = mhpmcounter3h; addr <= mhpmcounter31h; ++addr) {
+                csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+                csr_wr_cb[addr] = MK_CSR_WR_CB(write_plain);
+            }
+        for(unsigned addr = mhpmevent3; addr <= mhpmevent31; ++addr) {
+            csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+            csr_wr_cb[addr] = MK_CSR_WR_CB(write_plain);
+        }
+        for(unsigned addr = hpmcounter3; addr <= hpmcounter31; ++addr) {
+            csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+        }
+        if(traits<BASE>::XLEN == 32)
+            for(unsigned addr = hpmcounter3h; addr <= hpmcounter31h; ++addr) {
+                csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+            }
+        // common regs
+        const std::array<unsigned, 4> roaddrs{{misa, mvendorid, marchid, mimpid}};
+        for(auto addr : roaddrs) {
+            csr_rd_cb[addr] = MK_CSR_RD_CB(read_plain);
+            csr_wr_cb[addr] = MK_CSR_WR_CB(write_null);
+        }
+        // special handling & overrides
+        csr_rd_cb[time] = MK_CSR_RD_CB(read_time);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[timeh] = MK_CSR_RD_CB(read_time);
+        csr_rd_cb[cycle] = MK_CSR_RD_CB(read_cycle);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[cycleh] = MK_CSR_RD_CB(read_cycle);
+        csr_rd_cb[instret] = MK_CSR_RD_CB(read_instret);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[instreth] = MK_CSR_RD_CB(read_instret);
+
+        csr_rd_cb[mcycle] = MK_CSR_RD_CB(read_cycle);
+        csr_wr_cb[mcycle] = MK_CSR_WR_CB(write_cycle);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[mcycleh] = MK_CSR_RD_CB(read_cycle);
+        if(traits<BASE>::XLEN == 32)
+            csr_wr_cb[mcycleh] = MK_CSR_WR_CB(write_cycle);
+        csr_rd_cb[minstret] = MK_CSR_RD_CB(read_instret);
+        csr_wr_cb[minstret] = MK_CSR_WR_CB(write_instret);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[minstreth] = MK_CSR_RD_CB(read_instret);
+        if(traits<BASE>::XLEN == 32)
+            csr_wr_cb[minstreth] = MK_CSR_WR_CB(write_instret);
+        csr_rd_cb[mhartid] = MK_CSR_RD_CB(read_hartid);
+    };
+    ~riscv_hart_common() {
+        if(io_buf.str().length()) {
+            CPPLOG(INFO) << "tohost send '" << io_buf.str() << "'";
+        }
    }
-}
-struct riscv_hart_common {
-    riscv_hart_common(){};
-    ~riscv_hart_common(){};
    std::unordered_map<std::string, uint64_t> symbol_table;
+    uint64_t entry_address{0};
+    uint64_t tohost = std::numeric_limits<uint64_t>::max();
+    uint64_t fromhost = std::numeric_limits<uint64_t>::max();
+    std::stringstream io_buf;

-    std::unordered_map<std::string, uint64_t> get_sym_table(std::string name) {
-        if(!symbol_table.empty())
-            return symbol_table;
-        FILE* fp = fopen(name.c_str(), "r");
-        if(fp) {
-            std::array<char, 5> buf;
-            auto n = fread(buf.data(), 1, 4, fp);
-            fclose(fp);
-            if(n != 4)
-                throw std::runtime_error("input file has insufficient size");
-            buf[4] = 0;
-            if(strcmp(buf.data() + 1, "ELF") == 0) {
-                // Create elfio reader
-                ELFIO::elfio reader;
-                // Load ELF data
-                if(!reader.load(name))
-                    throw std::runtime_error("could not process elf file");
-                // check elf properties
-                if(reader.get_type() != ET_EXEC)
-                    throw std::runtime_error("wrong elf type in file");
-                if(reader.get_machine() != EM_RISCV)
-                    throw std::runtime_error("wrong elf machine in file");
-                const auto sym_sec = reader.sections[".symtab"];
-                if(SHT_SYMTAB == sym_sec->get_type() || SHT_DYNSYM == sym_sec->get_type()) {
-                    ELFIO::symbol_section_accessor symbols(reader, sym_sec);
-                    auto sym_no = symbols.get_symbols_num();
-                    std::string name;
-                    ELFIO::Elf64_Addr value = 0;
-                    ELFIO::Elf_Xword size = 0;
-                    unsigned char bind = 0;
-                    unsigned char type = 0;
-                    ELFIO::Elf_Half section = 0;
-                    unsigned char other = 0;
-                    for(auto i = 0U; i < sym_no; ++i) {
-                        symbols.get_symbol(i, name, value, size, bind, type, section, other);
-                        if(name != "") {
-                            this->symbol_table[name] = value;
+    void set_semihosting_callback(semihosting_cb_t<reg_t> cb) { semihosting_cb = cb; };
+
+    std::pair<uint64_t, bool> load_file(std::string name, int type) {
+        return std::make_pair(entry_address, read_elf_file(name, sizeof(reg_t) == 4 ? ELFIO::ELFCLASS32 : ELFIO::ELFCLASS64));
+    }
+
+    bool read_elf_file(std::string name, uint8_t expected_elf_class) {
+        // Create elfio reader
+        ELFIO::elfio reader;
+        // Load ELF data
+        if(reader.load(name)) {
+            // check elf properties
+            if(reader.get_class() != expected_elf_class)
+                return false;
+            if(reader.get_type() != ELFIO::ET_EXEC)
+                return false;
+            if(reader.get_machine() != ELFIO::EM_RISCV)
+                return false;
+            entry_address = reader.get_entry();
+            for(const auto& pseg : reader.segments) {
+                const auto fsize = pseg->get_file_size(); // 0x42c/0x0
+                const auto seg_data = pseg->get_data();
+                const auto type = pseg->get_type();
+                if(type == ELFIO::PT_LOAD && fsize > 0) {
+                    auto res = this->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_WRITE, traits<BASE>::MEM,
+                                           pseg->get_physical_address(), fsize, reinterpret_cast<const uint8_t* const>(seg_data));
+                    if(res != iss::Ok)
+                        CPPLOG(ERR) << "problem writing " << fsize << "bytes to 0x" << std::hex << pseg->get_physical_address();
+                }
+            }
+            const auto sym_sec = reader.sections[".symtab"];
+            if(ELFIO::SHT_SYMTAB == sym_sec->get_type() || ELFIO::SHT_DYNSYM == sym_sec->get_type()) {
+                ELFIO::symbol_section_accessor symbols(reader, sym_sec);
+                auto sym_no = symbols.get_symbols_num();
+                std::string name;
+                ELFIO::Elf64_Addr value = 0;
+                ELFIO::Elf_Xword size = 0;
+                unsigned char bind = 0;
+                unsigned char type = 0;
+                ELFIO::Elf_Half section = 0;
+                unsigned char other = 0;
+                for(auto i = 0U; i < sym_no; ++i) {
+                    symbols.get_symbol(i, name, value, size, bind, type, section, other);
+                    if(name != "") {
+                        this->symbol_table[name] = value;
 #ifndef NDEBUG
-                            CPPLOG(DEBUG) << "Found Symbol " << name;
+                        CPPLOG(DEBUG) << "Found Symbol " << name;
 #endif
-                        }
                    }
                }
-                return symbol_table;
+                try {
+                    tohost = symbol_table.at("tohost");
+                } catch(std::out_of_range& e) {
+                }
+                try {
+                    fromhost = symbol_table.at("fromhost");
+                } catch(std::out_of_range& e) {
+                }
            }
-            throw std::runtime_error(fmt::format("memory load file {} is not a valid elf file", name));
-        } else
-            throw std::runtime_error(fmt::format("memory load file not found, check if {} is a valid file", name));
+            return true;
+        }
+        return false;
    };
+
+    iss::status execute_sys_write(arch_if* aif, const std::array<uint64_t, 8>& loaded_payload, unsigned mem_type) {
+        uint64_t fd = loaded_payload[1];
+        uint64_t buf_ptr = loaded_payload[2];
+        uint64_t len = loaded_payload[3];
+        std::vector<char> buf(len);
+        if(aif->read(address_type::PHYSICAL, access_type::DEBUG_READ, mem_type, buf_ptr, len, reinterpret_cast<uint8_t*>(buf.data()))) {
+            CPPLOG(ERR) << "SYS_WRITE buffer read went wrong";
+            return iss::Err;
+        }
+        // we disregard the fd and just log to stdout
+        for(size_t i = 0; i < len; i++) {
+            if(buf[i] == '\n' || buf[i] == '\0') {
+                CPPLOG(INFO) << "tohost send '" << io_buf.str() << "'";
+                io_buf.str("");
+            } else
+                io_buf << buf[i];
+        }
+
+        // Not sure what the correct return value should be
+        uint8_t ret_val = 1;
+        if(fromhost != std::numeric_limits<uint64_t>::max())
+            if(aif->write(address_type::PHYSICAL, access_type::DEBUG_WRITE, mem_type, fromhost, 1, &ret_val)) {
+                CPPLOG(ERR) << "Fromhost write went wrong";
+                return iss::Err;
+            }
+        return iss::Ok;
+    }
+
+    constexpr bool has_compressed() { return traits<BASE>::MISA_VAL & 0b0100; }
+
+    constexpr reg_t get_pc_mask() { return has_compressed() ? (reg_t)~1 : (reg_t)~3; }
+
+    void disass_output(uint64_t pc, const std::string instr) override {
+        // NSCLOG(INFO, LOGCAT) << fmt::format("0x{:016x}    {:40} [p:{};s:0x{:x};c:{}]", pc, instr, lvl[this->reg.PRIV],
+        // (reg_t)state.mstatus,
+        //                                     this->reg.cycle + cycle_offset);
+        NSCLOG(INFO, LOGCAT) << fmt::format("0x{:016x}    {:40} [p:{};c:{}]", pc, instr, lvl[this->reg.PRIV],
+                                            this->reg.cycle + cycle_offset);
+    };
+
+    void register_csr(unsigned addr, rd_csr_f f) { csr_rd_cb[addr] = f; }
+    void register_csr(unsigned addr, wr_csr_f f) { csr_wr_cb[addr] = f; }
+    void register_csr(unsigned addr, rd_csr_f rdf, wr_csr_f wrf) {
+        csr_rd_cb[addr] = rdf;
+        csr_wr_cb[addr] = wrf;
+    }
+    void unregister_csr_rd(unsigned addr) { csr_rd_cb.erase(addr); }
+    void unregister_csr_wr(unsigned addr) { csr_wr_cb.erase(addr); }
+
+    bool debug_mode_active() { return this->reg.PRIV & 0x4; }
+
+    const reg_t& get_mhartid() const { return mhartid_reg; }
+    void set_mhartid(reg_t mhartid) { mhartid_reg = mhartid; };
+
+    iss::status read_csr(unsigned addr, reg_t& val) {
+        if(addr >= csr.size())
+            return iss::Err;
+        auto req_priv_lvl = (addr >> 8) & 0x3;
+        if(this->reg.PRIV < req_priv_lvl) // not having required privileges
+            throw illegal_instruction_fault(this->fault_data);
+        auto it = csr_rd_cb.find(addr);
+        if(it == csr_rd_cb.end() || !it->second) // non existent register
+            throw illegal_instruction_fault(this->fault_data);
+        return it->second(addr, val);
+    }
+
+    iss::status write_csr(unsigned addr, reg_t val) {
+        if(addr >= csr.size())
+            return iss::Err;
+        auto req_priv_lvl = (addr >> 8) & 0x3;
+        if(this->reg.PRIV < req_priv_lvl) // not having required privileges
+            throw illegal_instruction_fault(this->fault_data);
+        if((addr & 0xc00) == 0xc00) // writing to read-only region
+            throw illegal_instruction_fault(this->fault_data);
+        auto it = csr_wr_cb.find(addr);
+        if(it == csr_wr_cb.end() || !it->second) // non existent register
+            throw illegal_instruction_fault(this->fault_data);
+        return it->second(addr, val);
+    }
+
+    iss::status read_null(unsigned addr, reg_t& val) {
+        val = 0;
+        return iss::Ok;
+    }
+
+    iss::status write_null(unsigned addr, reg_t val) { return iss::status::Ok; }
+
+    iss::status read_plain(unsigned addr, reg_t& val) {
+        val = csr[addr];
+        return iss::Ok;
+    }
+
+    iss::status write_plain(unsigned addr, reg_t val) {
+        csr[addr] = val;
+        return iss::Ok;
+    }
+
+    iss::status read_cycle(unsigned addr, reg_t& val) {
+        auto cycle_val = this->reg.cycle + cycle_offset;
+        if(addr == mcycle) {
+            val = static_cast<reg_t>(cycle_val);
+        } else if(addr == mcycleh) {
+            val = static_cast<reg_t>(cycle_val >> 32);
+        }
+        return iss::Ok;
+    }
+
+    iss::status write_cycle(unsigned addr, reg_t val) {
+        if(sizeof(typename traits<BASE>::reg_t) != 4) {
+            mcycle_csr = static_cast<uint64_t>(val);
+        } else {
+            if(addr == mcycle) {
+                mcycle_csr = (mcycle_csr & 0xffffffff00000000) + val;
+            } else {
+                mcycle_csr = (static_cast<uint64_t>(val) << 32) + (mcycle_csr & 0xffffffff);
+            }
+        }
+        cycle_offset = mcycle_csr - this->reg.cycle; // TODO: relying on wrap-around
+        return iss::Ok;
+    }
+
+    iss::status read_instret(unsigned addr, reg_t& val) {
+        if((addr & 0xff) == (minstret & 0xff)) {
+            val = static_cast<reg_t>(this->reg.instret);
+        } else if((addr & 0xff) == (minstreth & 0xff)) {
+            val = static_cast<reg_t>(this->reg.instret >> 32);
+        }
+        return iss::Ok;
+    }
+
+    iss::status write_instret(unsigned addr, reg_t val) {
+        if(sizeof(typename traits<BASE>::reg_t) != 4) {
+            this->reg.instret = static_cast<uint64_t>(val);
+        } else {
+            if((addr & 0xff) == (minstret & 0xff)) {
+                this->reg.instret = (this->reg.instret & 0xffffffff00000000) + val;
+            } else {
+                this->reg.instret = (static_cast<uint64_t>(val) << 32) + (this->reg.instret & 0xffffffff);
+            }
+        }
+        this->reg.instret--;
+        return iss::Ok;
+    }
+
+    iss::status read_time(unsigned addr, reg_t& val) {
+        uint64_t time_val = this->reg.cycle / (100000000 / 32768 - 1); //-> ~3052;
+        if(addr == time) {
+            val = static_cast<reg_t>(time_val);
+        } else if(addr == timeh) {
+            if(sizeof(typename traits<BASE>::reg_t) != 4)
+                return iss::Err;
+            val = static_cast<reg_t>(time_val >> 32);
+        }
+        return iss::Ok;
+    }
+
+    iss::status read_tvec(unsigned addr, reg_t& val) {
+        val = csr[addr] & ~2;
+        return iss::Ok;
+    }
+
+    iss::status read_hartid(unsigned addr, reg_t& val) {
+        val = mhartid_reg;
+        return iss::Ok;
+    }
+
+    iss::status write_epc(unsigned addr, reg_t val) {
+        csr[addr] = val & get_pc_mask();
+        return iss::Ok;
+    }
+
+    iss::status write_dcsr(unsigned addr, reg_t val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        //                  +-------------- ebreakm
+        //                  |   +---------- stepi
+        //                  |   |  +++----- cause
+        //                  |   |  |||   +- step
+        csr[addr] = val & 0b1000100111000100U;
+        return iss::Ok;
+    }
+
+    iss::status read_debug(unsigned addr, reg_t& val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        val = csr[addr];
+        return iss::Ok;
+    }
+
+    iss::status write_dscratch(unsigned addr, reg_t val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        csr[addr] = val;
+        return iss::Ok;
+    }
+
+    iss::status read_dpc(unsigned addr, reg_t& val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        val = this->reg.DPC;
+        return iss::Ok;
+    }
+
+    iss::status write_dpc(unsigned addr, reg_t val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        this->reg.DPC = val;
+        return iss::Ok;
+    }
+
+    iss::status read_fcsr(unsigned addr, reg_t& val) {
+        switch(addr) {
+        case 1: // fflags, 4:0
+            val = bit_sub<0, 5>(this->get_fcsr());
+            break;
+        case 2: // frm, 7:5
+            val = bit_sub<5, 3>(this->get_fcsr());
+            break;
+        case 3: // fcsr
+            val = this->get_fcsr();
+            break;
+        default:
+            return iss::Err;
+        }
+        return iss::Ok;
+    }
+
+    iss::status write_fcsr(unsigned addr, reg_t val) {
+        switch(addr) {
+        case 1: // fflags, 4:0
+            this->set_fcsr((this->get_fcsr() & 0xffffffe0) | (val & 0x1f));
+            break;
+        case 2: // frm, 7:5
+            this->set_fcsr((this->get_fcsr() & 0xffffff1f) | ((val & 0x7) << 5));
+            break;
+        case 3: // fcsr
+            this->set_fcsr(val & 0xff);
+            break;
+        default:
+            return iss::Err;
+        }
+        return iss::Ok;
+    }
+
+    priv_if<reg_t> get_priv_if() {
+        return priv_if<reg_t>{.read_csr = [this](unsigned addr, reg_t& val) -> iss::status { return read_csr(addr, val); },
+                              .write_csr = [this](unsigned addr, reg_t val) -> iss::status { return write_csr(addr, val); },
+                              .exec_htif = [this](uint8_t const* data) -> iss::status { return execute_htif(data); },
+                              .csr_rd_cb{this->csr_rd_cb},
+                              .csr_wr_cb{csr_wr_cb},
+                              .mstatus{this->state},
+                              .tohost{this->tohost},
+                              .fromhost{this->fromhost},
+                              .mcause_max_irq{mcause_max_irq}};
+    }
+
+    iss::status execute_htif(uint8_t const* data) {
+        reg_t cur_data = *reinterpret_cast<const reg_t*>(data);
+        // Extract Device (bits 63:56)
+        uint8_t device = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 56) & 0xFF;
+        // Extract Command (bits 55:48)
+        uint8_t command = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 48) & 0xFF;
+        // Extract payload (bits 47:0)
+        uint64_t payload_addr = cur_data & 0xFFFFFFFFFFFFULL;
+        if(payload_addr & 1) {
+            CPPLOG(FATAL) << "this->tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr
+                          << "), stopping simulation";
+            this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+            this->interrupt_sim = payload_addr;
+            return iss::Ok;
+        } else if(device == 0 && command == 0) {
+            std::array<uint64_t, 8> loaded_payload;
+            if(memory.rd_mem(access_type::DEBUG_READ, payload_addr, 8 * sizeof(uint64_t),
+                             reinterpret_cast<uint8_t*>(loaded_payload.data())) == iss::Err)
+                CPPLOG(ERR) << "Syscall read went wrong";
+            uint64_t syscall_num = loaded_payload.at(0);
+            if(syscall_num == 64) { // SYS_WRITE
+                return this->execute_sys_write(this, loaded_payload, traits<BASE>::MEM);
+            } else {
+                CPPLOG(ERR) << "this->tohost syscall with number 0x" << std::hex << syscall_num << std::dec << " (" << syscall_num
+                            << ") not implemented";
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
+            }
+        } else {
+            CPPLOG(ERR) << "this->tohost functionality not implemented for device " << device << " and command " << command;
+            this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+            this->interrupt_sim = payload_addr;
+            return iss::Ok;
+        }
+    }
+
+    mmio::memory_hierarchy memories;
+
+    virtual mmio::memory_if get_mem_if() override {
+        assert(false || "This function should nevver be called");
+        return mmio::memory_if{};
+    }
+
+    virtual void set_next(mmio::memory_if mem_if) { memory = mem_if; };
+
+    void set_irq_num(unsigned i) { mcause_max_irq = 1 << util::ilog2(i); }
+
+protected:
+    hart_state<reg_t> state;
+
+    static constexpr reg_t get_mstatus_mask_t(unsigned priv_lvl = PRIV_M) {
+        if(sizeof(reg_t) == 4) {
+            return priv_lvl == PRIV_U ? 0x80000011UL :   // 0b1...0 0001 0001
+                       priv_lvl == PRIV_S ? 0x800de133UL // 0b0...0 0001 1000 1001 1001;
+                                          : 0x807ff9ddUL;
+        } else {
+            return priv_lvl == PRIV_U ? 0x011ULL : // 0b1...0 0001 0001
+                       priv_lvl == PRIV_S ? 0x000de133ULL
+                                          : 0x007ff9ddULL;
+        }
+    }
+
+    mmio::memory_if memory;
+    struct riscv_instrumentation_if : public iss::instrumentation_if {
+
+        riscv_instrumentation_if(riscv_hart_common<BASE, LOGCAT>& arch)
+        : arch(arch) {}
+        /**
+         * get the name of this architecture
+         *
+         * @return the name of this architecture
+         */
+        const std::string core_type_name() const override { return traits<BASE>::core_type; }
+
+        uint64_t get_pc() override { return arch.reg.PC; }
+
+        uint64_t get_next_pc() override { return arch.reg.NEXT_PC; }
+
+        uint64_t get_instr_word() override { return arch.reg.instruction; }
+
+        uint64_t get_instr_count() override { return arch.reg.icount; }
+
+        uint64_t get_pendig_traps() override { return arch.reg.trap_state; }
+
+        uint64_t get_total_cycles() override { return arch.reg.cycle + arch.cycle_offset; }
+
+        void update_last_instr_cycles(unsigned cycles) override { arch.cycle_offset += cycles - 1; }
+
+        bool is_branch_taken() override { return arch.reg.last_branch; }
+
+        unsigned get_reg_num() override { return traits<BASE>::NUM_REGS; }
+
+        unsigned get_reg_size(unsigned num) override { return traits<BASE>::reg_bit_widths[num]; }
+
+        std::unordered_map<std::string, uint64_t> const& get_symbol_table(std::string name) override { return arch.symbol_table; }
+
+        riscv_hart_common<BASE, LOGCAT>& arch;
+    };
+
+    friend struct riscv_instrumentation_if;
+    riscv_instrumentation_if instr_if;
+
+    instrumentation_if* get_instrumentation_if() override { return &instr_if; };
+
+    using csr_type = util::sparse_array<typename traits<BASE>::reg_t, 1ULL << 12, 12>;
+    using csr_page_type = typename csr_type::page_type;
+    csr_type csr;
+
+    std::unordered_map<unsigned, rd_csr_f> csr_rd_cb;
+    std::unordered_map<unsigned, wr_csr_f> csr_wr_cb;
+
+    reg_t mhartid_reg{0x0};
+    uint64_t mcycle_csr{0};
+    uint64_t minstret_csr{0};
+    reg_t fault_data;
+
+    int64_t cycle_offset{0};
+    int64_t instret_offset{0};
+    semihosting_cb_t<reg_t> semihosting_cb;
+    std::array<vm_info, 2> vm;
+    unsigned mcause_max_irq{16U};
 };

 } // namespace arch
--- a/src/iss/arch/riscv_hart_m_p.h
+++ b/src/iss/arch/riscv_hart_m_p.h
--- a/src/iss/arch/riscv_hart_msu_vp.h
+++ b/src/iss/arch/riscv_hart_msu_vp.h
--- a/src/iss/arch/riscv_hart_mu_p.h
+++ b/src/iss/arch/riscv_hart_mu_p.h
--- a/src/iss/arch/tgc5c.cpp
+++ b/src/iss/arch/tgc5c.cpp
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017 - 2020 MINRES Technologies GmbH
+ * Copyright (C) 2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
--- a/src/iss/arch/tgc5c.h
+++ b/src/iss/arch/tgc5c.h
--- a/src/iss/arch/wt_cache.h
+++ b/src/iss/arch/wt_cache.h
@@ -87,7 +87,7 @@ public:
    virtual ~wt_cache() = default;

    unsigned size{4096};
-    unsigned line_sz{32};
+    unsigned line_sz{64};
    unsigned ways{1};
    uint64_t io_address{0xf0000000};
    uint64_t io_addr_mask{0xf0000000};
@@ -119,7 +119,7 @@ template <typename BASE> iss::status iss::arch::wt_cache<BASE>::read_cache(phys_
        icache_ptr.reset(new cache::cache(size, line_sz, ways));
        dcache_ptr.reset(new cache::cache(size, line_sz, ways));
    }
-    if((a.val & io_addr_mask) != io_address) {
+    if((a.access & iss::access_type::FETCH) == iss::access_type::FETCH || (a.val & io_addr_mask) != io_address) {
        auto set_addr = (a.val & (size - 1)) >> util::ilog2(line_sz * ways);
        auto tag_addr = a.val >> util::ilog2(line_sz);
        auto& set = (is_fetch(a.access) ? icache_ptr : dcache_ptr)->sets[set_addr];
--- a/src/iss/debugger/csr_names.cpp
+++ b/src/iss/debugger/csr_names.cpp
--- a/src/iss/debugger/riscv_target_adapter.h
+++ b/src/iss/debugger/riscv_target_adapter.h
@@ -30,8 +30,8 @@
 *
 *******************************************************************************/

-#ifndef _ISS_DEBUGGER_RISCV_TARGET_ADAPTER_H_
-#define _ISS_DEBUGGER_RISCV_TARGET_ADAPTER_H_
+#ifndef _ISS_ARCH_DEBUGGER_RISCV_TARGET_ADAPTER_H_
+#define _ISS_ARCH_DEBUGGER_RISCV_TARGET_ADAPTER_H_

 #include "iss/arch_if.h"
 #include <iss/arch/traits.h>
@@ -48,6 +48,10 @@

 namespace iss {
 namespace debugger {
+
+char const* const get_csr_name(unsigned);
+constexpr auto csr_offset = 100U;
+
 using namespace iss::arch;
 using namespace iss::debugger;

@@ -129,11 +133,17 @@ public:

 protected:
    static inline constexpr addr_t map_addr(const addr_t& i) { return i; }
-
+    std::string csr_xml;
    iss::arch_if* core;
    rp_thread_ref thread_idx;
 };

+template <typename ARCH> typename std::enable_if<iss::arch::traits<ARCH>::FLEN != 0, unsigned>::type get_f0_offset() {
+    return iss::arch::traits<ARCH>::F0;
+}
+
+template <typename ARCH> typename std::enable_if<iss::arch::traits<ARCH>::FLEN == 0, unsigned>::type get_f0_offset() { return 0; }
+
 template <typename ARCH> status riscv_target_adapter<ARCH>::set_gen_thread(rp_thread_ref& thread) {
    thread_idx = thread;
    return Ok;
@@ -175,34 +185,37 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::current_thread_query

 template <typename ARCH> status riscv_target_adapter<ARCH>::read_registers(std::vector<uint8_t>& data, std::vector<uint8_t>& avail) {
    CPPLOG(TRACE) << "reading target registers";
-    // return idx<0?:;
    data.clear();
    avail.clear();
    const uint8_t* reg_base = core->get_regs_base_ptr();
    auto start_reg = arch::traits<ARCH>::X0;
-    for(size_t reg_no = start_reg; reg_no < start_reg + 33 /*arch::traits<ARCH>::NUM_REGS*/; ++reg_no) {
-        auto reg_width = arch::traits<ARCH>::reg_bit_widths[reg_no] / 8;
-        unsigned offset = traits<ARCH>::reg_byte_offsets[reg_no];
-        for(size_t j = 0; j < reg_width; ++j) {
-            data.push_back(*(reg_base + offset + j));
-            avail.push_back(0xff);
+    for(size_t i = 0; i < 33; ++i) {
+        if(i < arch::traits<ARCH>::RFS || i == arch::traits<ARCH>::PC) {
+            auto reg_no = i < 32 ? start_reg + i : arch::traits<ARCH>::PC;
+            unsigned offset = traits<ARCH>::reg_byte_offsets[reg_no];
+            for(size_t j = 0; j < arch::traits<ARCH>::XLEN / 8; ++j) {
+                data.push_back(*(reg_base + offset + j));
+                avail.push_back(0xff);
+            }
+        } else {
+            for(size_t j = 0; j < arch::traits<ARCH>::XLEN / 8; ++j) {
+                data.push_back(0);
+                avail.push_back(0);
+            }
+        }
+    }
+    if(iss::arch::traits<ARCH>::FLEN > 0) {
+        auto fstart_reg = get_f0_offset<ARCH>();
+        for(size_t i = 0; i < 32; ++i) {
+            auto reg_no = fstart_reg + i;
+            auto reg_width = arch::traits<ARCH>::reg_bit_widths[reg_no] / 8;
+            unsigned offset = traits<ARCH>::reg_byte_offsets[reg_no];
+            for(size_t j = 0; j < reg_width; ++j) {
+                data.push_back(*(reg_base + offset + j));
+                avail.push_back(0xff);
+            }
        }
    }
-    // work around fill with F type registers
-    //    if (arch::traits<ARCH>::NUM_REGS < 65) {
-    //        auto reg_width = sizeof(typename arch::traits<ARCH>::reg_t);
-    //        for (size_t reg_no = 0; reg_no < 33; ++reg_no) {
-    //            for (size_t j = 0; j < reg_width; ++j) {
-    //                data.push_back(0x0);
-    //                avail.push_back(0x00);
-    //            }
-    //            // if(arch::traits<ARCH>::XLEN < 64)
-    //            //     for(unsigned j=0; j<4; ++j){
-    //            //         data.push_back(0x0);
-    //            //         avail.push_back(0x00);
-    //            //     }
-    //        }
-    //    }
    return Ok;
 }

@@ -210,25 +223,25 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::write_registers(cons
    auto start_reg = arch::traits<ARCH>::X0;
    auto* reg_base = core->get_regs_base_ptr();
    auto iter = data.data();
-    bool e_ext = arch::traits<ARCH>::PC < 32;
-    for(size_t reg_no = 0; reg_no < start_reg + 33 /*arch::traits<ARCH>::NUM_REGS*/; ++reg_no) {
-        if(e_ext && reg_no > 15) {
-            if(reg_no == 32) {
-                auto reg_width = arch::traits<ARCH>::reg_bit_widths[arch::traits<ARCH>::PC] / 8;
-                auto offset = traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::PC];
-                std::copy(iter, iter + reg_width, reg_base);
-            } else {
-                const uint64_t zero_val = 0;
-                auto reg_width = arch::traits<ARCH>::reg_bit_widths[15] / 8;
-                auto iter = (uint8_t*)&zero_val;
-                std::copy(iter, iter + reg_width, reg_base);
-            }
-        } else {
-            auto reg_width = arch::traits<ARCH>::reg_bit_widths[reg_no] / 8;
-            auto offset = traits<ARCH>::reg_byte_offsets[reg_no];
-            std::copy(iter, iter + reg_width, reg_base);
-            iter += 4;
-            reg_base += offset;
+    auto iter_end = data.data() + data.size();
+    for(size_t i = 0; i < 33 && iter < iter_end; ++i) {
+        auto reg_width = arch::traits<ARCH>::XLEN / 8;
+        if(i < arch::traits<ARCH>::RFS) {
+            auto offset = traits<ARCH>::reg_byte_offsets[start_reg + i];
+            std::copy(iter, iter + reg_width, reg_base + offset);
+        } else if(i == 32) {
+            auto offset = traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::PC];
+            std::copy(iter, iter + reg_width, reg_base + offset);
+        }
+        iter += reg_width;
+    }
+    if(iss::arch::traits<ARCH>::FLEN > 0) {
+        auto fstart_reg = get_f0_offset<ARCH>();
+        auto reg_width = arch::traits<ARCH>::FLEN / 8;
+        for(size_t i = 0; i < 32 && iter < iter_end; ++i) {
+            unsigned offset = traits<ARCH>::reg_byte_offsets[fstart_reg + i];
+            std::copy(iter, iter + reg_width, reg_base + offset);
+            iter += reg_width;
        }
    }
    return Ok;
@@ -236,7 +249,7 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::write_registers(cons

 template <typename ARCH>
 status riscv_target_adapter<ARCH>::read_single_register(unsigned int reg_no, std::vector<uint8_t>& data, std::vector<uint8_t>& avail) {
-    if(reg_no < 65) {
+    if(reg_no < csr_offset) {
        // auto reg_size = arch::traits<ARCH>::reg_bit_width(static_cast<typename
        // arch::traits<ARCH>::reg_e>(reg_no))/8;
        auto* reg_base = core->get_regs_base_ptr();
@@ -247,23 +260,24 @@ status riscv_target_adapter<ARCH>::read_single_register(unsigned int reg_no, std
        std::copy(reg_base + offset, reg_base + offset + reg_width, data.begin());
        std::fill(avail.begin(), avail.end(), 0xff);
    } else {
-        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_READ, traits<ARCH>::CSR, reg_no - 65);
+        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_READ, traits<ARCH>::CSR, reg_no - csr_offset);
        data.resize(sizeof(typename traits<ARCH>::reg_t));
        avail.resize(sizeof(typename traits<ARCH>::reg_t));
        std::fill(avail.begin(), avail.end(), 0xff);
        core->read(a, data.size(), data.data());
+        std::fill(avail.begin(), avail.end(), 0xff);
    }
    return data.size() > 0 ? Ok : Err;
 }

 template <typename ARCH> status riscv_target_adapter<ARCH>::write_single_register(unsigned int reg_no, const std::vector<uint8_t>& data) {
-    if(reg_no < 65) {
+    if(reg_no < csr_offset) {
        auto* reg_base = core->get_regs_base_ptr();
        auto reg_width = arch::traits<ARCH>::reg_bit_widths[static_cast<typename arch::traits<ARCH>::reg_e>(reg_no)] / 8;
        auto offset = traits<ARCH>::reg_byte_offsets[reg_no];
        std::copy(data.begin(), data.begin() + reg_width, reg_base + offset);
    } else {
-        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_WRITE, traits<ARCH>::CSR, reg_no - 65);
+        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_WRITE, traits<ARCH>::CSR, reg_no - csr_offset);
        core->write(a, data.size(), data.data());
    }
    return Ok;
@@ -276,7 +290,7 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::read_mem(uint64_t ad
 }

 template <typename ARCH> status riscv_target_adapter<ARCH>::write_mem(uint64_t addr, const std::vector<uint8_t>& data) {
-    auto a = map_addr({iss::access_type::DEBUG_READ, iss::address_type::VIRTUAL, 0, addr});
+    auto a = map_addr({iss::access_type::DEBUG_WRITE, iss::address_type::VIRTUAL, 0, addr});
    auto f = [&]() -> status { return core->write(a, data.size(), data.data()); };
    return srv->execute_syncronized(f);
 }
@@ -369,93 +383,57 @@ status riscv_target_adapter<ARCH>::resume_from_addr(bool step, int sig, uint64_t
 }

 template <typename ARCH> status riscv_target_adapter<ARCH>::target_xml_query(std::string& out_buf) {
-    const std::string res{"<?xml version=\"1.0\"?><!DOCTYPE target SYSTEM \"gdb-target.dtd\">"
-                          "<target><architecture>riscv:rv32</architecture>"
-                          //"  <feature name=\"org.gnu.gdb.riscv.rv32i\">\n"
-                          //"    <reg name=\"x0\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x1\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x2\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x3\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x4\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x5\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x6\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x7\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x8\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x9\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x10\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x11\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x12\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x13\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x14\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x15\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x16\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x17\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x18\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x19\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x20\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x21\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x22\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x23\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x24\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x25\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x26\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x27\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x28\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x29\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x30\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x31\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"  </feature>\n"
-                          "</target>"};
-    out_buf = res;
+    if(!csr_xml.size()) {
+        std::ostringstream oss;
+        oss << "<?xml version=\"1.0\"?><!DOCTYPE feature SYSTEM \"gdb-target.dtd\"><target version=\"1.0\">\n";
+        if(iss::arch::traits<ARCH>::XLEN == 32)
+            oss << "<architecture>riscv:rv32</architecture>\n";
+        else if(iss::arch::traits<ARCH>::XLEN == 64)
+            oss << "  <architectureriscv:rv64</architecture>\n";
+        oss << "  <feature name=\"org.gnu.gdb.riscv.cpu\">\n";
+        auto reg_base_num = iss::arch::traits<ARCH>::X0;
+        for(auto i = 0U; i < iss::arch::traits<ARCH>::RFS; ++i) {
+            oss << "    <reg name=\"x" << i << "\" bitsize=\"" << iss::arch::traits<ARCH>::reg_bit_widths[reg_base_num + i]
+                << "\" type=\"int\" regnum=\"" << i << "\"/>\n";
+        }
+        oss << "    <reg name=\"pc\" bitsize=\"" << iss::arch::traits<ARCH>::reg_bit_widths[iss::arch::traits<ARCH>::PC]
+            << "\" type=\"code_ptr\" regnum=\"" << 32U << "\"/>\n";
+        oss << "  </feature>\n";
+        if(iss::arch::traits<ARCH>::FLEN > 0) {
+            oss << "  <feature name=\"org.gnu.gdb.riscv.fpu\">\n";
+            auto reg_base_num = get_f0_offset<ARCH>();
+            auto type = iss::arch::traits<ARCH>::FLEN == 32 ? "ieee_single" : "riscv_double";
+            for(auto i = 0U; i < 32; ++i) {
+                oss << "    <reg name=\"f" << i << "\" bitsize=\"" << iss::arch::traits<ARCH>::reg_bit_widths[reg_base_num + i]
+                    << "\" type=\"" << type << "\" regnum=\"" << i + 33 << "\"/>\n";
+            }
+            oss << "    <reg name=\"fcsr\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN << "\" regnum=\"103\" type int/>\n";
+            oss << "    <reg name=\"fflags\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN << "\" regnum=\"101\" type int/>\n";
+            oss << "    <reg name=\"frm\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN << "\" regnum=\"102\" type int/>\n";
+            oss << "  </feature>\n";
+        }
+        oss << "  <feature name=\"org.gnu.gdb.riscv.csr\">\n";
+        std::vector<uint8_t> data;
+        std::vector<uint8_t> avail;
+        data.resize(sizeof(typename traits<ARCH>::reg_t));
+        avail.resize(sizeof(typename traits<ARCH>::reg_t));
+        for(auto i = 0U; i < 4096; ++i) {
+            typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_READ, traits<ARCH>::CSR, i);
+            std::fill(avail.begin(), avail.end(), 0xff);
+            auto res = core->read(a, data.size(), data.data());
+            if(res == iss::Ok) {
+                oss << "    <reg name=\"" << get_csr_name(i) << "\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN
+                    << "\"  type=\"int\" regnum=\"" << (i + csr_offset) << "\"/>\n";
+            }
+        }
+        oss << "  </feature>\n";
+        oss << "</target>\n";
+        csr_xml = oss.str();
+    }
+    out_buf = csr_xml;
    return Ok;
 }
-
-/*
- *
-<?xml version="1.0"?>
-<!DOCTYPE target SYSTEM "gdb-target.dtd">
-<target>
-  <architecture>riscv:rv32</architecture>
-
-  <feature name="org.gnu.gdb.riscv.rv32i">
-    <reg name="x0"  bitsize="32" group="general"/>
-    <reg name="x1"  bitsize="32" group="general"/>
-    <reg name="x2"  bitsize="32" group="general"/>
-    <reg name="x3"  bitsize="32" group="general"/>
-    <reg name="x4"  bitsize="32" group="general"/>
-    <reg name="x5"  bitsize="32" group="general"/>
-    <reg name="x6"  bitsize="32" group="general"/>
-    <reg name="x7"  bitsize="32" group="general"/>
-    <reg name="x8"  bitsize="32" group="general"/>
-    <reg name="x9"  bitsize="32" group="general"/>
-    <reg name="x10" bitsize="32" group="general"/>
-    <reg name="x11" bitsize="32" group="general"/>
-    <reg name="x12" bitsize="32" group="general"/>
-    <reg name="x13" bitsize="32" group="general"/>
-    <reg name="x14" bitsize="32" group="general"/>
-    <reg name="x15" bitsize="32" group="general"/>
-    <reg name="x16" bitsize="32" group="general"/>
-    <reg name="x17" bitsize="32" group="general"/>
-    <reg name="x18" bitsize="32" group="general"/>
-    <reg name="x19" bitsize="32" group="general"/>
-    <reg name="x20" bitsize="32" group="general"/>
-    <reg name="x21" bitsize="32" group="general"/>
-    <reg name="x22" bitsize="32" group="general"/>
-    <reg name="x23" bitsize="32" group="general"/>
-    <reg name="x24" bitsize="32" group="general"/>
-    <reg name="x25" bitsize="32" group="general"/>
-    <reg name="x26" bitsize="32" group="general"/>
-    <reg name="x27" bitsize="32" group="general"/>
-    <reg name="x28" bitsize="32" group="general"/>
-    <reg name="x29" bitsize="32" group="general"/>
-    <reg name="x30" bitsize="32" group="general"/>
-    <reg name="x31" bitsize="32" group="general"/>
-  </feature>
-
-</target>
-
- */
 } // namespace debugger
 } // namespace iss

-#endif /* _ISS_DEBUGGER_RISCV_TARGET_ADAPTER_H_ */
+#endif /* _ISS_ARCH_DEBUGGER_RISCV_TARGET_ADAPTER_H_ */
--- a/src/iss/mmio/clic.h
+++ b/src/iss/mmio/clic.h
@@ -0,0 +1,252 @@
+
+#include "iss/arch/riscv_hart_common.h"
+#include "iss/vm_types.h"
+#include "memory_if.h"
+#include <util/logging.h>
+
+namespace iss {
+namespace mmio {
+struct clic_config {
+    uint64_t clic_base{0xc0000000};
+    unsigned clic_int_ctl_bits{4};
+    unsigned clic_num_irq{16};
+    unsigned clic_num_trigger{0};
+    bool nmode{false};
+};
+
+inline void read_reg_with_offset(uint32_t reg, uint8_t offs, uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 1 + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 2 + i);
+        break;
+    case 3:
+        *data = *(reg_ptr + 3);
+        break;
+    }
+}
+
+inline void write_reg_with_offset(uint32_t& reg, uint8_t offs, const uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + i) = *(data + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 1 + i) = *(data + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 2 + i) = *(data + i);
+        break;
+    case 3:
+        *(reg_ptr + 3) = *data;
+        break;
+    }
+}
+
+template <typename WORD_TYPE> struct clic : public memory_elem {
+    using this_class = clic<WORD_TYPE>;
+    using reg_t = WORD_TYPE;
+    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
+
+    clic(arch::priv_if<WORD_TYPE> hart_if, clic_config cfg)
+    : hart_if(hart_if)
+    , cfg(cfg) {
+        clic_int_reg.resize(cfg.clic_num_irq, clic_int_reg_t{.raw = 0});
+        clic_cfg_reg = 0x30;
+        clic_mact_lvl = clic_mprev_lvl = (1 << (cfg.clic_int_ctl_bits)) - 1;
+        clic_uact_lvl = clic_uprev_lvl = (1 << (cfg.clic_int_ctl_bits)) - 1;
+        hart_if.csr_rd_cb[arch::mtvt] = MK_CSR_RD_CB(read_plain);
+        hart_if.csr_wr_cb[arch::mtvt] = MK_CSR_WR_CB(write_xtvt);
+        //        hart_if.csr_rd_cb[mxnti] = MK_CSR_RD_CB(read_plain(a,r);};
+        //        hart_if.csr_wr_cb[mxnti] = MK_CSR_WR_CB(write_plain(a,r);};
+        hart_if.csr_rd_cb[arch::mintstatus] = MK_CSR_RD_CB(read_intstatus);
+        hart_if.csr_wr_cb[arch::mintstatus] = MK_CSR_WR_CB(write_null);
+        //        hart_if.csr_rd_cb[mscratchcsw] = MK_CSR_RD_CB(read_plain(a,r);};
+        //        hart_if.csr_wr_cb[mscratchcsw] = MK_CSR_WR_CB(write_plain(a,r);};
+        //        hart_if.csr_rd_cb[mscratchcswl] = MK_CSR_RD_CB(read_plain(a,r);};
+        //        hart_if.csr_wr_cb[mscratchcswl] = MK_CSR_WR_CB(write_plain(a,r);};
+        hart_if.csr_rd_cb[arch::mintthresh] = MK_CSR_RD_CB(read_plain);
+        hart_if.csr_wr_cb[arch::mintthresh] = MK_CSR_WR_CB(write_intthresh);
+        if(cfg.nmode) {
+            hart_if.csr_rd_cb[arch::utvt] = MK_CSR_RD_CB(read_plain);
+            hart_if.csr_wr_cb[arch::utvt] = MK_CSR_WR_CB(write_xtvt);
+            hart_if.csr_rd_cb[arch::uintstatus] = MK_CSR_RD_CB(read_intstatus);
+            hart_if.csr_wr_cb[arch::uintstatus] = MK_CSR_WR_CB(write_null);
+            hart_if.csr_rd_cb[arch::uintthresh] = MK_CSR_RD_CB(read_plain);
+            hart_if.csr_wr_cb[arch::uintthresh] = MK_CSR_WR_CB(write_intthresh);
+        }
+        hart_if.csr[arch::mintthresh] = (1 << (cfg.clic_int_ctl_bits)) - 1;
+        hart_if.csr[arch::uintthresh] = (1 << (cfg.clic_int_ctl_bits)) - 1;
+    }
+
+    ~clic() = default;
+
+    memory_if get_mem_if() override {
+        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
+                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
+    }
+
+    void set_next(memory_if mem) override { down_stream_mem = mem; }
+
+    std::tuple<uint64_t, uint64_t> get_range() override { return {cfg.clic_base, cfg.clic_base + 0x7fff}; }
+
+private:
+    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
+        if(addr >= cfg.clic_base && (addr + length) < (cfg.clic_base + 0x8000))
+            return read_clic(addr, length, data);
+        return down_stream_mem.rd_mem(access, addr, length, data);
+    }
+
+    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
+        if(addr >= cfg.clic_base && (addr + length) < (cfg.clic_base + 0x8000))
+            return write_clic(addr, length, data);
+        return down_stream_mem.wr_mem(access, addr, length, data);
+    }
+
+    iss::status read_clic(uint64_t addr, unsigned length, uint8_t* data);
+
+    iss::status write_clic(uint64_t addr, unsigned length, uint8_t const* data);
+
+    iss::status write_null(unsigned addr, reg_t val) { return iss::status::Ok; }
+
+    iss::status read_plain(unsigned addr, reg_t& val) {
+        val = hart_if.csr[addr];
+        return iss::Ok;
+    }
+
+    iss::status write_xtvt(unsigned addr, reg_t val) {
+        hart_if.csr[addr] = val & ~0x3fULL;
+        return iss::Ok;
+    }
+
+    iss::status read_cause(unsigned addr, reg_t& val);
+    iss::status write_cause(unsigned addr, reg_t val);
+
+    iss::status read_intstatus(unsigned addr, reg_t& val);
+    iss::status write_intthresh(unsigned addr, reg_t val);
+
+protected:
+    arch::priv_if<WORD_TYPE> hart_if;
+    memory_if down_stream_mem;
+    clic_config cfg;
+    uint8_t clic_cfg_reg{0};
+    std::array<uint32_t, 32> clic_inttrig_reg;
+    union clic_int_reg_t {
+        struct {
+            uint8_t ip;
+            uint8_t ie;
+            uint8_t attr;
+            uint8_t ctl;
+        };
+        uint32_t raw;
+    };
+    std::vector<clic_int_reg_t> clic_int_reg;
+    uint8_t clic_mprev_lvl{0}, clic_uprev_lvl{0};
+    uint8_t clic_mact_lvl{0}, clic_uact_lvl{0};
+};
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::read_clic(uint64_t addr, unsigned length, uint8_t* const data) {
+    if(addr == cfg.clic_base) { // cliccfg
+        *data = clic_cfg_reg;
+        for(auto i = 1; i < length; ++i)
+            *(data + i) = 0;
+    } else if(addr >= (cfg.clic_base + 0x40) && (addr + length) <= (cfg.clic_base + 0x40 + cfg.clic_num_trigger * 4)) { // clicinttrig
+        auto offset = ((addr & 0x7fff) - 0x40) / 4;
+        read_reg_with_offset(clic_inttrig_reg[offset], addr & 0x3, data, length);
+    } else if(addr >= (cfg.clic_base + 0x1000) &&
+              (addr + length) <= (cfg.clic_base + 0x1000 + cfg.clic_num_irq * 4)) { // clicintip/clicintie/clicintattr/clicintctl
+        auto offset = ((addr & 0x7fff) - 0x1000) / 4;
+        read_reg_with_offset(clic_int_reg[offset].raw, addr & 0x3, data, length);
+    } else {
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = 0;
+    }
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::write_clic(uint64_t addr, unsigned length, const uint8_t* const data) {
+    if(addr == cfg.clic_base) { // cliccfg
+        clic_cfg_reg = (clic_cfg_reg & ~0x1e) | (*data & 0x1e);
+    } else if(addr >= (cfg.clic_base + 0x40) && (addr + length) <= (cfg.clic_base + 0x40 + cfg.clic_num_trigger * 4)) { // clicinttrig
+        auto offset = ((addr & 0x7fff) - 0x40) / 4;
+        write_reg_with_offset(clic_inttrig_reg[offset], addr & 0x3, data, length);
+    } else if(addr >= (cfg.clic_base + 0x1000) &&
+              (addr + length) <= (cfg.clic_base + 0x1000 + cfg.clic_num_irq * 4)) { // clicintip/clicintie/clicintattr/clicintctl
+        auto offset = ((addr & 0x7fff) - 0x1000) / 4;
+        write_reg_with_offset(clic_int_reg[offset].raw, addr & 0x3, data, length);
+        clic_int_reg[offset].raw &= 0xf0c70101; // clicIntCtlBits->0xf0, clicintattr->0xc7, clicintie->0x1, clicintip->0x1
+    }
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::read_cause(unsigned addr, reg_t& val) {
+    if((hart_if.csr[arch::mtvec] & 0x3) == 3) {
+        val = hart_if.csr[addr] & (1UL << (sizeof(reg_t) * 8) | (hart_if.mcause_max_irq - 1) | (0xfUL << 16));
+        auto mode = (addr >> 8) & 0x3;
+        switch(mode) {
+        case 0:
+            val |= clic_uprev_lvl << 16;
+            val |= hart_if.mstatus.UPIE << 27;
+            break;
+        default:
+            val |= clic_mprev_lvl << 16;
+            val |= hart_if.mstatus.MPIE << 27;
+            val |= hart_if.mstatus.MPP << 28;
+            break;
+        }
+    } else
+        val = hart_if.csr[addr] & ((1UL << (sizeof(WORD_TYPE) * 8 - 1)) | (hart_if.mcause_max_irq - 1));
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::write_cause(unsigned addr, reg_t val) {
+    if((hart_if.csr[arch::mtvec] & 0x3) == 3) {
+        auto mask = ((1UL << (sizeof(WORD_TYPE) * 8 - 1)) | (hart_if.mcause_max_irq - 1) | (0xfUL << 16));
+        hart_if.csr[addr] = (val & mask) | (hart_if.csr[addr] & ~mask);
+        auto mode = (addr >> 8) & 0x3;
+        switch(mode) {
+        case 0:
+            clic_uprev_lvl = ((val >> 16) & 0xff) | (1 << (8 - cfg.clic_int_ctl_bits)) - 1;
+            hart_if.mstatus.UPIE = (val >> 27) & 0x1;
+            break;
+        default:
+            clic_mprev_lvl = ((val >> 16) & 0xff) | (1 << (8 - cfg.clic_int_ctl_bits)) - 1;
+            hart_if.mstatus.MPIE = (val >> 27) & 0x1;
+            hart_if.mstatus.MPP = (val >> 28) & 0x3;
+            break;
+        }
+    } else {
+        auto mask = ((1UL << (sizeof(WORD_TYPE) * 8 - 1)) | (hart_if.mcause_max_irq - 1));
+        hart_if.csr[addr] = (val & mask) | (hart_if.csr[addr] & ~mask);
+    }
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::read_intstatus(unsigned addr, reg_t& val) {
+    auto mode = (addr >> 8) & 0x3;
+    val = clic_uact_lvl & 0xff;
+    if(mode == 0x3)
+        val += (clic_mact_lvl & 0xff) << 24;
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::write_intthresh(unsigned addr, reg_t val) {
+    hart_if.csr[addr] = (val & 0xff) | (1 << (cfg.clic_int_ctl_bits)) - 1;
+    return iss::Ok;
+}
+
+} // namespace mmio
+} // namespace iss
--- a/src/iss/mmio/memory_if.cpp
+++ b/src/iss/mmio/memory_if.cpp
@@ -0,0 +1,26 @@
+#include "memory_if.h"
+
+namespace iss {
+namespace mmio {
+void memory_hierarchy::prepend(memory_elem& e) {
+    hierarchy.push_front(e);
+    update_chain();
+}
+void memory_hierarchy::append(memory_elem& e) {
+    hierarchy.push_back(e);
+    update_chain();
+}
+void memory_hierarchy::insert_before(memory_elem&) {}
+void memory_hierarchy::insert_after(memory_elem&) {}
+void memory_hierarchy::replace_last(memory_elem&) {}
+void memory_hierarchy::update_chain() {
+    bool tail = false;
+    for(size_t i = 0; i < hierarchy.size(); ++i) {
+        hierarchy[i].get().register_csrs();
+        if(i)
+            hierarchy[i - 1].get().set_next(hierarchy[i].get().get_mem_if());
+    }
+}
+
+} // namespace mmio
+} // namespace iss
--- a/src/iss/mmio/memory_if.h
+++ b/src/iss/mmio/memory_if.h
@@ -0,0 +1,76 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
+#ifndef _MEMORY_MEMORY_IF_
+#define _MEMORY_MEMORY_IF_
+
+#include "iss/vm_types.h"
+#include <deque>
+#include <functional>
+#include <limits>
+#include <util/delegate.h>
+
+namespace iss {
+namespace mmio {
+
+using rd_mem_func_sig = iss::status(iss::access_type, uint64_t, unsigned, uint8_t*);
+using wr_mem_func_sig = iss::status(iss::access_type, uint64_t, unsigned, uint8_t const*);
+
+struct memory_if {
+    util::delegate<iss::status(access_type, uint64_t, unsigned, uint8_t*)> rd_mem;
+    util::delegate<iss::status(access_type, uint64_t, unsigned, uint8_t const*)> wr_mem;
+};
+
+struct memory_elem {
+    virtual memory_if get_mem_if() = 0;
+    virtual void set_next(memory_if) = 0;
+    virtual void register_csrs() {}
+    virtual std::tuple<uint64_t, uint64_t> get_range() { return {0, std::numeric_limits<uint64_t>::max()}; }
+};
+
+struct memory_hierarchy {
+    void prepend(memory_elem&);
+    void append(memory_elem&);
+    void insert_before(memory_elem&);
+    void insert_after(memory_elem&);
+    void replace_last(memory_elem&);
+
+protected:
+    void update_chain();
+    std::deque<std::reference_wrapper<memory_elem>> hierarchy;
+};
+
+} // namespace mmio
+} // namespace iss
+#endif
--- a/src/iss/mmio/memory_with_htif.h
+++ b/src/iss/mmio/memory_with_htif.h
@@ -0,0 +1,62 @@
+#ifndef _MEMORY_WITH_HTIF_
+#define _MEMORY_WITH_HTIF_
+
+#include "iss/arch/riscv_hart_common.h"
+#include "iss/vm_types.h"
+#include "memory_if.h"
+#include <util/logging.h>
+#include <util/sparse_array.h>
+
+namespace iss {
+namespace mmio {
+template <typename WORD_TYPE> struct memory_with_htif : public memory_elem {
+    using this_class = memory_with_htif<WORD_TYPE>;
+    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
+
+    memory_with_htif(arch::priv_if<WORD_TYPE> hart_if)
+    : hart_if(hart_if) {}
+
+    ~memory_with_htif() = default;
+
+    memory_if get_mem_if() override {
+        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
+                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
+    }
+
+    void set_next(memory_if) override {
+        // intenrionally left empty, leaf element
+    }
+
+private:
+    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
+        for(auto offs = 0U; offs < length; ++offs) {
+            *(data + offs) = mem[(addr + offs) % mem.size()];
+        }
+        return iss::Ok;
+    }
+
+    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
+        mem_type::page_type& p = mem(addr / mem.page_size);
+        std::copy(data, data + length, p.data() + (addr & mem.page_addr_mask));
+        // this->tohost handling in case of riscv-test
+        // according to https://github.com/riscv-software-src/riscv-isa-sim/issues/364#issuecomment-607657754:
+        if(access && iss::access_type::FUNC) {
+            if(addr == hart_if.tohost) {
+                return hart_if.exec_htif(data);
+            }
+            if((WORD_LEN == 32 && addr == hart_if.fromhost + 4) || (WORD_LEN == 64 && addr == hart_if.fromhost)) {
+                uint64_t fhostvar = *reinterpret_cast<uint64_t*>(p.data() + (hart_if.fromhost & mem.page_addr_mask));
+                *reinterpret_cast<uint64_t*>(p.data() + (hart_if.tohost & mem.page_addr_mask)) = fhostvar;
+            }
+        }
+        return iss::Ok;
+    }
+
+protected:
+    using mem_type = util::sparse_array<uint8_t, 1ULL << 32>;
+    mem_type mem;
+    arch::priv_if<WORD_TYPE> hart_if;
+};
+} // namespace mmio
+} // namespace iss
+#endif // _MEMORY_WITH_HTIF_
--- a/src/iss/mmio/pmp.h
+++ b/src/iss/mmio/pmp.h
@@ -0,0 +1,212 @@
+
+#include "iss/arch/riscv_hart_common.h"
+#include "iss/vm_types.h"
+#include "memory_if.h"
+#include <util/logging.h>
+
+namespace iss {
+namespace mmio {
+struct clic_config {
+    uint64_t clic_base{0xc0000000};
+    unsigned clic_int_ctl_bits{4};
+    unsigned clic_num_irq{16};
+    unsigned clic_num_trigger{0};
+    bool nmode{false};
+};
+
+inline void read_reg_with_offset(uint32_t reg, uint8_t offs, uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 1 + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 2 + i);
+        break;
+    case 3:
+        *data = *(reg_ptr + 3);
+        break;
+    }
+}
+
+inline void write_reg_with_offset(uint32_t& reg, uint8_t offs, const uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + i) = *(data + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 1 + i) = *(data + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 2 + i) = *(data + i);
+        break;
+    case 3:
+        *(reg_ptr + 3) = *data;
+        break;
+    }
+}
+
+template <typename WORD_TYPE> struct pmp : public memory_elem {
+    using this_class = pmp<WORD_TYPE>;
+    using reg_t = WORD_TYPE;
+    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
+
+    pmp(arch::priv_if<WORD_TYPE> hart_if, clic_config cfg)
+    : hart_if(hart_if)
+    , cfg(cfg) {
+        for(size_t i = arch::pmpaddr0; i <= arch::pmpaddr15; ++i) {
+            hart_if.csr_rd_cb[i] = MK_CSR_RD_CB(read_plain);
+            hart_if.csr_wr_cb[i] = MK_CSR_WR_CB(write_plain);
+        }
+        for(size_t i = arch::pmpcfg0; i < arch::pmpcfg0 + 16 / sizeof(reg_t); ++i) {
+            hart_if.csr_rd_cb[i] = MK_CSR_RD_CB(read_plain);
+            hart_if.csr_wr_cb[i] = MK_CSR_WR_CB(write_pmpcfg);
+        }
+    }
+
+    ~pmp() = default;
+
+    memory_if get_mem_if() override {
+        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
+                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
+    }
+
+    void set_next(memory_if mem) override { down_stream_mem = mem; }
+
+private:
+    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
+        if(!pmp_check(access, addr, length) && !is_debug(access)) {
+            hart_if.fault_data = addr;
+            if(is_debug(access))
+                throw trap_access(0, addr);
+            hart_if.reg.trap_state = (1UL << 31) | ((access == access_type::FETCH ? 1 : 5) << 16); // issue trap 1
+            return iss::Err;
+        }
+        return down_stream_mem.rd_mem(access, addr, length, data);
+    }
+
+    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
+        if(!pmp_check(access, addr, length) && !is_debug(access)) {
+            hart_if.fault_data = addr;
+            if(is_debug(access))
+                throw trap_access(0, addr);
+            hart_if.reg.trap_state = (1UL << 31) | (7 << 16); // issue trap 1
+            return iss::Err;
+        }
+        return down_stream_mem.wr_mem(access, addr, length, data);
+    }
+
+    iss::status read_plain(unsigned addr, reg_t& val) {
+        val = hart_if.csr[addr];
+        return iss::Ok;
+    }
+
+    iss::status write_plain(unsigned addr, reg_t const& val) {
+        hart_if.csr[addr] = val;
+        return iss::Ok;
+    }
+
+    iss::status write_pmpcfg(unsigned addr, reg_t val) {
+        hart_if.csr[addr] = val & 0x9f9f9f9f;
+        return iss::Ok;
+    }
+
+    bool pmp_check(const access_type type, const uint64_t addr, const unsigned len);
+
+protected:
+    arch::priv_if<WORD_TYPE> hart_if;
+    memory_if down_stream_mem;
+};
+
+template <typename WORD_TYPE> bool pmp<WORD_TYPE>::pmp_check(const access_type type, const uint64_t addr, const unsigned len) {
+    constexpr auto PMP_SHIFT = 2U;
+    constexpr auto PMP_R = 0x1U;
+    constexpr auto PMP_W = 0x2U;
+    constexpr auto PMP_X = 0x4U;
+    constexpr auto PMP_A = 0x18U;
+    constexpr auto PMP_L = 0x80U;
+    constexpr auto PMP_TOR = 0x1U;
+    constexpr auto PMP_NA4 = 0x2U;
+    constexpr auto PMP_NAPOT = 0x3U;
+    reg_t base = 0;
+    auto any_active = false;
+    auto const cfg_reg_size = sizeof(reg_t);
+    for(size_t i = 0; i < 16; i++) {
+        reg_t tor = hart_if.csr[arch::pmpaddr0 + i] << PMP_SHIFT;
+        uint8_t cfg = hart_if.csr[arch::pmpcfg0 + (i / cfg_reg_size)] >> (i % cfg_reg_size);
+        if(cfg & PMP_A) {
+            any_active = true;
+            auto pmp_a = (cfg & PMP_A) >> 3;
+            auto is_tor = pmp_a == PMP_TOR;
+            auto is_na4 = pmp_a == PMP_NA4;
+
+            reg_t mask = (hart_if.csr[arch::pmpaddr0 + i] << 1) | (!is_na4);
+            mask = ~(mask & ~(mask + 1)) << PMP_SHIFT;
+
+            // Check each 4-byte sector of the access
+            auto any_match = false;
+            auto all_match = true;
+            for(reg_t offset = 0; offset < len; offset += 1 << PMP_SHIFT) {
+                reg_t cur_addr = addr + offset;
+                auto napot_match = ((cur_addr ^ tor) & mask) == 0;
+                auto tor_match = base <= (cur_addr + len - 1) && cur_addr < tor;
+                auto match = is_tor ? tor_match : napot_match;
+                any_match |= match;
+                all_match &= match;
+            }
+            if(any_match) {
+                // If the PMP matches only a strict subset of the access, fail it
+                if(!all_match)
+                    return false;
+                return (hart_if.reg.PRIV == arch::PRIV_M && !(cfg & PMP_L)) || (type == access_type::READ && (cfg & PMP_R)) ||
+                       (type == access_type::WRITE && (cfg & PMP_W)) || (type == access_type::FETCH && (cfg & PMP_X));
+            }
+        }
+        base = tor;
+    }
+    //    constexpr auto pmp_num_regs = 16;
+    //    reg_t tor_base = 0;
+    //    auto any_active = false;
+    //    auto lower_addr = addr >>2;
+    //    auto upper_addr = (addr+len-1)>>2;
+    //    for (size_t i = 0; i < pmp_num_regs; i++) {
+    //        uint8_t cfg = csr[pmpcfg0+(i/4)]>>(i%4);
+    //        uint8_t cfg_next = i==(pmp_num_regs-1)? 0 : csr[pmpcfg0+((i+1)/4)]>>((i+1)%4);
+    //        auto pmpaddr = csr[pmpaddr0+i];
+    //        if (cfg & PMP_A) {
+    //            any_active=true;
+    //            auto is_tor = bit_sub<3, 2>(cfg) == PMP_TOR;
+    //            auto is_napot = bit_sub<4, 1>(cfg) && bit_sub<3, 2>(cfg_next)!= PMP_TOR;
+    //            if(is_napot) {
+    //                reg_t mask = bit_sub<3, 1>(cfg)?~( pmpaddr & ~(pmpaddr + 1)): 0x3fffffff;
+    //                auto mpmpaddr = pmpaddr & mask;
+    //                if((lower_addr&mask) == mpmpaddr && (upper_addr&mask)==mpmpaddr)
+    //                    return  (hart_if.reg.PRIV == PRIV_M && !(cfg & PMP_L)) ||
+    //                            (type == access_type::READ && (cfg & PMP_R)) ||
+    //                            (type == access_type::WRITE && (cfg & PMP_W)) ||
+    //                            (type == access_type::FETCH && (cfg & PMP_X));
+    //            } else if(is_tor) {
+    //                if(lower_addr>=tor_base && upper_addr<=pmpaddr)
+    //                    return  (hart_if.reg.PRIV == PRIV_M && !(cfg & PMP_L)) ||
+    //                            (type == access_type::READ && (cfg & PMP_R)) ||
+    //                            (type == access_type::WRITE && (cfg & PMP_W)) ||
+    //                            (type == access_type::FETCH && (cfg & PMP_X));
+    //            }
+    //        }
+    //        tor_base = pmpaddr;
+    //    }
+    return !any_active || hart_if.reg.PRIV == arch::PRIV_M;
+}
+
+} // namespace mmio
+} // namespace iss
--- a/src/iss/semihosting/semihosting.cpp
+++ b/src/iss/semihosting/semihosting.cpp
@@ -1,35 +1,36 @@
 #include "semihosting.h"
-#include <cstdint>
-#include <map>
-#include <iss/vm_types.h>
-#include <stdexcept>
 #include <chrono>
+#include <cstdint>
+#include <iss/vm_types.h>
+#include <map>
+#include <stdexcept>
 // explanation of syscalls can be found at https://github.com/SpinalHDL/openocd_riscv/blob/riscv_spinal/src/target/semihosting_common.h

-const char *SYS_OPEN_MODES_STRS[] = { "r", "rb", "r+", "r+b", "w", "wb", "w+", "w+b", "a", "ab", "a+", "a+b" };
+const char* SYS_OPEN_MODES_STRS[] = {"r", "rb", "r+", "r+b", "w", "wb", "w+", "w+b", "a", "ab", "a+", "a+b"};

-template <typename T> T sh_read_field(iss::arch_if* arch_if_ptr, T addr, int len=4) {
+template <typename T> T sh_read_field(iss::arch_if* arch_if_ptr, T addr, int len = 4) {
    uint8_t bytes[4];
    auto res = arch_if_ptr->read(iss::address_type::PHYSICAL, iss::access_type::DEBUG_READ, 0, addr, 4, &bytes[0]);
-    //auto res = arch_if_ptr->read(iss::address_type::PHYSICAL, iss::access_type::DEBUG_READ, 0, *parameter, 1, &character);
+    // auto res = arch_if_ptr->read(iss::address_type::PHYSICAL, iss::access_type::DEBUG_READ, 0, *parameter, 1, &character);

-    if(res != iss::Ok){
-        return 0; //TODO THROW ERROR
-    } else return static_cast<T>(bytes[0]) | (static_cast<T>(bytes[1]) << 8) | (static_cast<T>(bytes[2]) << 16) | (static_cast<T>(bytes[3]) << 24);
+    if(res != iss::Ok) {
+        return 0; // TODO THROW ERROR
+    } else
+        return static_cast<T>(bytes[0]) | (static_cast<T>(bytes[1]) << 8) | (static_cast<T>(bytes[2]) << 16) |
+               (static_cast<T>(bytes[3]) << 24);
 }

-template <typename T> std::string sh_read_string(iss::arch_if* arch_if_ptr, T addr, T str_len){
+template <typename T> std::string sh_read_string(iss::arch_if* arch_if_ptr, T addr, T str_len) {
    std::vector<uint8_t> buffer(str_len);
-    for (int i = 0; i < str_len; i++ ) {
+    for(int i = 0; i < str_len; i++) {
        buffer[i] = sh_read_field(arch_if_ptr, addr + i, 1);
    }
    std::string str(buffer.begin(), buffer.end());
    return str;
 }

-
 template <typename T> void semihosting_callback<T>::operator()(iss::arch_if* arch_if_ptr, T* call_number, T* parameter) {
-    static std::map<T, FILE *> openFiles;
+    static std::map<T, FILE*> openFiles;
    static T file_count = 3;
    static T semihostingErrno;

@@ -38,18 +39,18 @@ template <typename T> void semihosting_callback<T>::operator()(iss::arch_if* arc
        auto end = std::chrono::high_resolution_clock::now(); // end measurement
        auto elapsed = end - timeVar;
        auto millis = std::chrono::duration_cast<std::chrono::milliseconds>(elapsed).count();
-        *call_number = millis; //TODO get time now
+        *call_number = millis; // TODO get time now
        break;
    }
    case semihosting_syscalls::SYS_CLOSE: {
        T file_handle = *parameter;
-        if (openFiles.size() <= file_handle && file_handle < 0) {
+        if(openFiles.size() <= file_handle && file_handle < 0) {
            semihostingErrno = EBADF;
            return;
        }
        auto file = openFiles[file_handle];
        openFiles.erase(file_handle);
-        if (!(file == stdin || file == stdout || file == stderr)) {
+        if(!(file == stdin || file == stdout || file == stderr)) {
            int i = fclose(file);
            *call_number = i;
        } else {
@@ -80,7 +81,8 @@ template <typename T> void semihosting_callback<T>::operator()(iss::arch_if* arc
        auto file = openFiles[file_handle];

        size_t currentPos = ftell(file);
-        if (currentPos < 0) throw std::runtime_error("SYS_FLEN negative value");
+        if(currentPos < 0)
+            throw std::runtime_error("SYS_FLEN negative value");
        fseek(file, 0, SEEK_END);
        size_t length = ftell(file);
        fseek(file, currentPos, SEEK_SET);
@@ -107,30 +109,30 @@ template <typename T> void semihosting_callback<T>::operator()(iss::arch_if* arc
    }
    case semihosting_syscalls::SYS_OPEN: {
        T path_str_addr = sh_read_field<T>(arch_if_ptr, *parameter);
-        T mode = sh_read_field<T>(arch_if_ptr, 4+(*parameter));
-        T path_len = sh_read_field<T>(arch_if_ptr, 8+(*parameter));
+        T mode = sh_read_field<T>(arch_if_ptr, 4 + (*parameter));
+        T path_len = sh_read_field<T>(arch_if_ptr, 8 + (*parameter));

        std::string path_str = sh_read_string<T>(arch_if_ptr, path_str_addr, path_len);

-        //TODO LOG INFO
+        // TODO LOG INFO

-        if (mode >= 12) {
-            //TODO throw ERROR
+        if(mode >= 12) {
+            // TODO throw ERROR
            return;
        }

-        FILE *file = nullptr;
+        FILE* file = nullptr;
        if(path_str == ":tt") {
-            if (mode < 4)
+            if(mode < 4)
                file = stdin;
-            else if (mode < 8) 
+            else if(mode < 8)
                file = stdout;
            else
                file = stderr;
        } else {
            file = fopen(path_str.c_str(), SYS_OPEN_MODES_STRS[mode]);
-            if (file == nullptr) {
-                //TODO throw error
+            if(file == nullptr) {
+                // TODO throw error
                return;
            }
        }
@@ -138,45 +140,41 @@ template <typename T> void semihosting_callback<T>::operator()(iss::arch_if* arc
        openFiles[file_handle] = file;
        *call_number = file_handle;
        break;
-
    }
    case semihosting_syscalls::SYS_READ: {
-        T file_handle = sh_read_field<T>(arch_if_ptr, (*parameter)+4);
+        T file_handle = sh_read_field<T>(arch_if_ptr, (*parameter) + 4);
        T addr = sh_read_field<T>(arch_if_ptr, *parameter);
-        T count = sh_read_field<T>(arch_if_ptr, (*parameter)+8);
+        T count = sh_read_field<T>(arch_if_ptr, (*parameter) + 8);

        auto file = openFiles[file_handle];

        std::vector<uint8_t> buffer(count);
        size_t num_read = 0;
-        if (file == stdin)
-        {
+        if(file == stdin) {
            // when reading from stdin: mimic behaviour from read syscall
            // and return on newline.
-            while (num_read < count)
-            {
+            while(num_read < count) {
                char c = fgetc(file);
                buffer[num_read] = c;
                num_read++;
-                if (c == '\n')
+                if(c == '\n')
                    break;
            }
        } else {
            num_read = fread(buffer.data(), 1, count, file);
        }
        buffer.resize(num_read);
-        for(int i  = 0; i<num_read; i++) {
-            auto res = arch_if_ptr->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_READ, 0, addr+i, 1, &buffer[i]);
+        for(int i = 0; i < num_read; i++) {
+            auto res = arch_if_ptr->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_READ, 0, addr + i, 1, &buffer[i]);
            if(res != iss::Ok)
                return;
        }
        *call_number = count - num_read;
        break;
-
    }
    case semihosting_syscalls::SYS_READC: {
        uint8_t character = getchar();
-        //character = getchar();
+        // character = getchar();
        /*if(character != iss::Ok)
            std::cout << "Not OK";
            return;*/
@@ -185,17 +183,18 @@ template <typename T> void semihosting_callback<T>::operator()(iss::arch_if* arc
    }
    case semihosting_syscalls::SYS_REMOVE: {
        T path_str_addr = sh_read_field<T>(arch_if_ptr, *parameter);
-        T path_len = sh_read_field<T>(arch_if_ptr, (*parameter)+4);
+        T path_len = sh_read_field<T>(arch_if_ptr, (*parameter) + 4);
        std::string path_str = sh_read_string<T>(arch_if_ptr, path_str_addr, path_len);

-        if(remove(path_str.c_str())<0) *call_number = -1;
+        if(remove(path_str.c_str()) < 0)
+            *call_number = -1;
        break;
    }
    case semihosting_syscalls::SYS_RENAME: {
        T path_str_addr_old = sh_read_field<T>(arch_if_ptr, *parameter);
-        T path_len_old = sh_read_field<T>(arch_if_ptr, (*parameter)+4);
-        T path_str_addr_new = sh_read_field<T>(arch_if_ptr, (*parameter)+8);
-        T path_len_new = sh_read_field<T>(arch_if_ptr, (*parameter)+12);
+        T path_len_old = sh_read_field<T>(arch_if_ptr, (*parameter) + 4);
+        T path_str_addr_new = sh_read_field<T>(arch_if_ptr, (*parameter) + 8);
+        T path_len_new = sh_read_field<T>(arch_if_ptr, (*parameter) + 12);

        std::string path_str_old = sh_read_string<T>(arch_if_ptr, path_str_addr_old, path_len_old);
        std::string path_str_new = sh_read_string<T>(arch_if_ptr, path_str_addr_new, path_len_new);
@@ -204,17 +203,18 @@ template <typename T> void semihosting_callback<T>::operator()(iss::arch_if* arc
    }
    case semihosting_syscalls::SYS_SEEK: {
        T file_handle = sh_read_field<T>(arch_if_ptr, *parameter);
-        T pos = sh_read_field<T>(arch_if_ptr, (*parameter)+1);
+        T pos = sh_read_field<T>(arch_if_ptr, (*parameter) + 1);
        auto file = openFiles[file_handle];

        int retval = fseek(file, pos, SEEK_SET);
-        if(retval<0) throw std::runtime_error("SYS_SEEK negative return value");
+        if(retval < 0)
+            throw std::runtime_error("SYS_SEEK negative return value");

        break;
    }
    case semihosting_syscalls::SYS_SYSTEM: {
        T cmd_addr = sh_read_field<T>(arch_if_ptr, *parameter);
-        T cmd_len = sh_read_field<T>(arch_if_ptr, (*parameter)+1); 
+        T cmd_len = sh_read_field<T>(arch_if_ptr, (*parameter) + 1);
        std::string cmd = sh_read_string<T>(arch_if_ptr, cmd_addr, cmd_len);
        system(cmd.c_str());
        break;
@@ -224,16 +224,16 @@ template <typename T> void semihosting_callback<T>::operator()(iss::arch_if* arc
        break;
    }
    case semihosting_syscalls::SYS_TIME: {
-        //returns time in seconds scince 01.01.1970 00:00
+        // returns time in seconds scince 01.01.1970 00:00
        *call_number = time(NULL);
        break;
    }
    case semihosting_syscalls::SYS_TMPNAM: {
        T buffer_addr = sh_read_field<T>(arch_if_ptr, *parameter);
-        T identifier = sh_read_field<T>(arch_if_ptr, (*parameter)+1);
-        T buffer_len = sh_read_field<T>(arch_if_ptr, (*parameter)+2);
+        T identifier = sh_read_field<T>(arch_if_ptr, (*parameter) + 1);
+        T buffer_len = sh_read_field<T>(arch_if_ptr, (*parameter) + 2);

-        if (identifier > 255) {
+        if(identifier > 255) {
            *call_number = -1;
            return;
        }
@@ -243,15 +243,16 @@ template <typename T> void semihosting_callback<T>::operator()(iss::arch_if* arc

        for(int i = 0; i < buffer_len; i++) {
            uint8_t character = filename[i];
-            auto res = arch_if_ptr->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_READ, 0, (*parameter)+i, 1, &character);
-            if(res != iss::Ok) return;
+            auto res = arch_if_ptr->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_READ, 0, (*parameter) + i, 1, &character);
+            if(res != iss::Ok)
+                return;
        }
        break;
    }
    case semihosting_syscalls::SYS_WRITE: {
-        T file_handle = sh_read_field<T>(arch_if_ptr, (*parameter)+4);
+        T file_handle = sh_read_field<T>(arch_if_ptr, (*parameter) + 4);
        T addr = sh_read_field<T>(arch_if_ptr, *parameter);
-        T count = sh_read_field<T>(arch_if_ptr, (*parameter)+8);
+        T count = sh_read_field<T>(arch_if_ptr, (*parameter) + 8);

        auto file = openFiles[file_handle];
        std::string str = sh_read_string<T>(arch_if_ptr, addr, count);
--- a/src/iss/semihosting/semihosting.h
+++ b/src/iss/semihosting/semihosting.h
@@ -1,8 +1,8 @@
 #ifndef _SEMIHOSTING_H_
 #define _SEMIHOSTING_H_
-#include <iss/arch_if.h>
-#include <functional>
 #include <chrono>
+#include <functional>
+#include <iss/arch_if.h>
 /*
 * According to:
 * "Semihosting for AArch32 and AArch64, Release 2.0"
@@ -50,9 +50,10 @@ enum class semihosting_syscalls {
    USER_CMD_0x1FF = 0x1FF,
 };

-template <typename T> struct semihosting_callback{
+template <typename T> struct semihosting_callback {
    std::chrono::high_resolution_clock::time_point timeVar;
-    semihosting_callback(): timeVar(std::chrono::high_resolution_clock::now()) {}
+    semihosting_callback()
+    : timeVar(std::chrono::high_resolution_clock::now()) {}
    void operator()(iss::arch_if* arch_if_ptr, T* call_number, T* parameter);
 };

--- a/src/main.cpp
+++ b/src/main.cpp
@@ -69,7 +69,8 @@ int main(int argc, char* argv[]) {
        ("logfile,l", po::value<std::string>(), "Sets default log file.")
        ("disass,d", po::value<std::string>()->implicit_value(""), "Enables disassembly")
        ("gdb-port,g", po::value<unsigned>()->default_value(0), "enable gdb server and specify port to use")
-        ("instructions,i", po::value<uint64_t>()->default_value(std::numeric_limits<uint64_t>::max()), "max. number of instructions to simulate")
+        ("ilimit,i", po::value<uint64_t>()->default_value(std::numeric_limits<uint64_t>::max()), "max. number of instructions to simulate")
+        ("flimit", po::value<uint64_t>()->default_value(std::numeric_limits<uint64_t>::max()), "max. number of fetches to simulate")
        ("reset,r", po::value<std::string>(), "reset address")
        ("dump-ir", "dump the intermediate representation")
        ("elf,f", po::value<std::vector<std::string>>(), "ELF file(s) to load")
@@ -120,7 +121,7 @@ int main(int argc, char* argv[]) {
        iss::vm_ptr vm{nullptr};
        iss::cpu_ptr cpu{nullptr};
        semihosting_callback<uint32_t> cb{};
-        semihosting_cb_t<uint32_t> semihosting_cb = [&cb](iss::arch_if* i, uint32_t* a0, uint32_t* a1) {cb(i,a0,a1);};
+        semihosting_cb_t<uint32_t> semihosting_cb = [&cb](iss::arch_if* i, uint32_t* a0, uint32_t* a1) { cb(i, a0, a1); };
        std::string isa_opt(clim["isa"].as<std::string>());
        if(isa_opt.size() == 0 || isa_opt == "?") {
            auto list = f.get_names();
@@ -140,7 +141,10 @@ int main(int argc, char* argv[]) {
            std::tie(cpu, vm) = f.create(isa_opt, clim["gdb-port"].as<unsigned>(), &semihosting_cb);
        }
        if(!cpu) {
-            CPPLOG(ERR) << "Could not create cpu for isa " << isa_opt << " and backend " << clim["backend"].as<std::string>() << std::endl;
+            auto list = f.get_names();
+            std::sort(std::begin(list), std::end(list));
+            CPPLOG(ERR) << "Could not create cpu for isa " << isa_opt << " and backend " << clim["backend"].as<std::string>() << "\n"
+                        << "Available implementations (core|platform|backend):\n  - " << util::join(list, "\n  - ") << std::endl;
            return 127;
        }
        if(!vm) {
@@ -202,21 +206,36 @@ int main(int argc, char* argv[]) {
        if(clim.count("elf"))
            for(std::string input : clim["elf"].as<std::vector<std::string>>()) {
                auto start_addr = vm->get_arch()->load_file(input);
-                if(start_addr.second) // FIXME: this always evaluates to true as load file always returns <sth, true>
+                if(start_addr.second)
                    start_address = start_addr.first;
+                else {
+                    LOG(ERR) << "Error occured while loading file " << input << std::endl;
+                    return 1;
+                }
            }
        for(std::string input : args) {
            auto start_addr = vm->get_arch()->load_file(input); // treat remaining arguments as elf files
-            if(start_addr.second) // FIXME: this always evaluates to true as load file always returns <sth, true>
+            if(start_addr.second)
                start_address = start_addr.first;
+            else {
+                LOG(ERR) << "Error occured while loading file " << input << std::endl;
+                return 1;
+            }
        }
        if(clim.count("reset")) {
            auto str = clim["reset"].as<std::string>();
            start_address = str.find("0x") == 0 ? std::stoull(str.substr(2), nullptr, 16) : std::stoull(str, nullptr, 10);
        }
        vm->reset(start_address);
-        auto cycles = clim["instructions"].as<uint64_t>();
-        res = vm->start(cycles, dump);
+        auto limit = clim["ilimit"].as<uint64_t>();
+        auto cond = iss::finish_cond_e::JUMP_TO_SELF;
+        if(clim.count("flimit")) {
+            cond = cond | iss::finish_cond_e::FCOUNT_LIMIT;
+            limit = clim["flimit"].as<uint64_t>();
+        } else {
+            cond = cond | iss::finish_cond_e::ICOUNT_LIMIT;
+        }
+        res = vm->start(limit, dump, cond);

        auto instr_if = vm->get_arch()->get_instrumentation_if();
        // this assumes a single input file
--- a/src/sysc/core_complex.cpp
+++ b/src/sysc/core_complex.cpp
@@ -42,7 +42,6 @@
 #include <iss/plugin/loader.h>
 #endif
 #include "sc_core_adapter_if.h"
-#include <iss/arch/tgc_mapper.h>
 #include <scc/report.h>
 #include <util/ities.h>
 #include <iostream>
@@ -125,7 +124,7 @@ using vm_ptr = std::unique_ptr<iss::vm_if>;

 class core_wrapper {
 public:
-    core_wrapper(core_complex* owner)
+    core_wrapper(core_complex_if* owner)
    : owner(owner) {}

    void reset(uint64_t addr) { vm->reset(addr); }
@@ -181,7 +180,7 @@ public:
                                             "SystemC sub-commands: break <time>, print_time"});
    }

-    core_complex* const owner;
+    core_complex_if* const owner;
    vm_ptr vm{nullptr};
    sc_cpu_ptr cpu{nullptr};
    iss::debugger::target_adapter_if* tgt_adapter{nullptr};
@@ -197,9 +196,9 @@ struct core_trace {
    scv_tr_handle tr_handle;
 };

-SC_HAS_PROCESS(core_complex); // NOLINT
 #ifndef CWR_SYSTEMC
-core_complex::core_complex(sc_module_name const& name)
+template <unsigned int BUSWIDTH>
+core_complex<BUSWIDTH>::core_complex(sc_module_name const& name)
 : sc_module(name)
 , fetch_lut(tlm_dmi_ext())
 , read_lut(tlm_dmi_ext())
@@ -208,7 +207,7 @@ core_complex::core_complex(sc_module_name const& name)
 }
 #endif

-void core_complex::init() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::init() {
    trc = new core_trace();
    ibus.register_invalidate_direct_mem_ptr([=](uint64_t start, uint64_t end) -> void {
        auto lut_entry = fetch_lut.getEntry(start);
@@ -227,6 +226,7 @@ void core_complex::init() {
        }
    });

+    SC_HAS_PROCESS(core_complex<BUSWIDTH>); // NOLINT
    SC_THREAD(run);
    SC_METHOD(rst_cb);
    sensitive << rst_i;
@@ -252,16 +252,16 @@ void core_complex::init() {
 #endif
 }

-core_complex::~core_complex() {
+template <unsigned int BUSWIDTH> core_complex<BUSWIDTH>::~core_complex() {
    delete cpu;
    delete trc;
    for(auto* p : plugin_list)
        delete p;
 }

-void core_complex::trace(sc_trace_file* trf) const {}
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::trace(sc_trace_file* trf) const {}

-void core_complex::before_end_of_elaboration() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::before_end_of_elaboration() {
    SCCDEBUG(SCMOD) << "instantiating iss::arch::tgf with " << GET_PROP_VALUE(backend) << " backend";
    // cpu = scc::make_unique<core_wrapper>(this);
    cpu = new core_wrapper(this);
@@ -302,7 +302,7 @@ void core_complex::before_end_of_elaboration() {
    }
 }

-void core_complex::start_of_simulation() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::start_of_simulation() {
    // quantum_keeper.reset();
    if(GET_PROP_VALUE(elf_file).size() > 0) {
        istringstream is(GET_PROP_VALUE(elf_file));
@@ -325,7 +325,7 @@ void core_complex::start_of_simulation() {
    }
 }

-bool core_complex::disass_output(uint64_t pc, const std::string instr_str) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::disass_output(uint64_t pc, const std::string instr_str) {
    if(trc->m_db == nullptr)
        return false;
    if(trc->tr_handle.is_active())
@@ -339,7 +339,7 @@ bool core_complex::disass_output(uint64_t pc, const std::string instr_str) {
    return true;
 }

-void core_complex::forward() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::forward() {
 #ifndef CWR_SYSTEMC
    set_clock_period(clk_i.read());
 #else
@@ -348,24 +348,24 @@ void core_complex::forward() {
 #endif
 }

-void core_complex::set_clock_period(sc_core::sc_time period) {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::set_clock_period(sc_core::sc_time period) {
    curr_clk = period;
    if(period == SC_ZERO_TIME)
        cpu->set_interrupt_execution(true);
 }

-void core_complex::rst_cb() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::rst_cb() {
    if(rst_i.read())
        cpu->set_interrupt_execution(true);
 }

-void core_complex::sw_irq_cb() { cpu->local_irq(3, sw_irq_i.read()); }
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::sw_irq_cb() { cpu->local_irq(3, sw_irq_i.read()); }

-void core_complex::timer_irq_cb() { cpu->local_irq(7, timer_irq_i.read()); }
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::timer_irq_cb() { cpu->local_irq(7, timer_irq_i.read()); }

-void core_complex::ext_irq_cb() { cpu->local_irq(11, ext_irq_i.read()); }
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::ext_irq_cb() { cpu->local_irq(11, ext_irq_i.read()); }

-void core_complex::local_irq_cb() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::local_irq_cb() {
    for(auto i = 0U; i < local_irq_i.size(); ++i) {
        if(local_irq_i[i].event()) {
            cpu->local_irq(16 + i, local_irq_i[i].read());
@@ -373,7 +373,7 @@ void core_complex::local_irq_cb() {
    }
 }

-void core_complex::run() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::run() {
    wait(SC_ZERO_TIME); // separate from elaboration phase
    do {
        wait(SC_ZERO_TIME);
@@ -387,11 +387,11 @@ void core_complex::run() {
        quantum_keeper.reset();
        cpu->set_interrupt_execution(false);
        cpu->start(dump_ir);
-    } while(cpu->get_interrupt_execution());
+    } while(!cpu->get_interrupt_execution());
    sc_stop();
 }

-bool core_complex::read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) {
    auto& dmi_lut = is_fetch ? fetch_lut : read_lut;
    auto lut_entry = dmi_lut.getEntry(addr);
    if(lut_entry.get_granted_access() != tlm::tlm_dmi::DMI_ACCESS_NONE && addr + length <= lut_entry.get_end_address() + 1) {
@@ -449,7 +449,7 @@ bool core_complex::read_mem(uint64_t addr, unsigned length, uint8_t* const data,
    }
 }

-bool core_complex::write_mem(uint64_t addr, unsigned length, const uint8_t* const data) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::write_mem(uint64_t addr, unsigned length, const uint8_t* const data) {
    auto lut_entry = write_lut.getEntry(addr);
    if(lut_entry.get_granted_access() != tlm::tlm_dmi::DMI_ACCESS_NONE && addr + length <= lut_entry.get_end_address() + 1) {
        auto offset = addr - lut_entry.get_start_address();
@@ -497,7 +497,7 @@ bool core_complex::write_mem(uint64_t addr, unsigned length, const uint8_t* cons
    }
 }

-bool core_complex::read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) {
    tlm::tlm_generic_payload gp;
    gp.set_command(tlm::TLM_READ_COMMAND);
    gp.set_address(addr);
@@ -507,7 +507,7 @@ bool core_complex::read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const d
    return dbus->transport_dbg(gp) == length;
 }

-bool core_complex::write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) {
    write_buf.resize(length);
    std::copy(data, data + length, write_buf.begin()); // need to copy as TLM does not guarantee data integrity
    tlm::tlm_generic_payload gp;
@@ -518,5 +518,10 @@ bool core_complex::write_mem_dbg(uint64_t addr, unsigned length, const uint8_t*
    gp.set_streaming_width(length);
    return dbus->transport_dbg(gp) == length;
 }
+
+template class core_complex<scc::LT>;
+template class core_complex<32>;
+template class core_complex<64>;
+
 } /* namespace tgfs */
 } /* namespace sysc */
--- a/src/sysc/core_complex.h
+++ b/src/sysc/core_complex.h
@@ -33,6 +33,7 @@
 #ifndef _SYSC_CORE_COMPLEX_H_
 #define _SYSC_CORE_COMPLEX_H_

+#include <scc/signal_opt_ports.h>
 #include <scc/tick2time.h>
 #include <scc/traceable.h>
 #include <scc/utilities.h>
@@ -40,10 +41,8 @@
 #include <tlm/scc/scv/tlm_rec_initiator_socket.h>
 #ifdef CWR_SYSTEMC
 #include <scmlinc/scml_property.h>
-#define SOCKET_WIDTH 32
 #else
 #include <cci_configuration>
-#define SOCKET_WIDTH scc::LT
 #endif
 #include <memory>
 #include <tlm>
@@ -68,12 +67,35 @@ public:
 namespace tgfs {
 class core_wrapper;
 struct core_trace;
+struct core_complex_if {

-class core_complex : public sc_core::sc_module, public scc::traceable {
+    virtual ~core_complex_if() = default;
+
+    virtual bool read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) = 0;
+
+    virtual bool write_mem(uint64_t addr, unsigned length, const uint8_t* const data) = 0;
+
+    virtual bool read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) = 0;
+
+    virtual bool write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) = 0;
+
+    virtual bool disass_output(uint64_t pc, const std::string instr) = 0;
+
+    virtual unsigned get_last_bus_cycles() = 0;
+
+    //! Allow quantum keeper handling
+    virtual void sync(uint64_t) = 0;
+
+    virtual char const* hier_name() = 0;
+
+    scc::sc_in_opt<uint64_t> mtime_i{"mtime_i"};
+};
+
+template <unsigned int BUSWIDTH = scc::LT> class core_complex : public sc_core::sc_module, public scc::traceable, public core_complex_if {
 public:
-    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<SOCKET_WIDTH>> ibus{"ibus"};
+    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<BUSWIDTH>> ibus{"ibus"};

-    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<SOCKET_WIDTH>> dbus{"dbus"};
+    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<BUSWIDTH>> dbus{"dbus"};

    sc_core::sc_in<bool> rst_i{"rst_i"};

@@ -88,8 +110,6 @@ public:
 #ifndef CWR_SYSTEMC
    sc_core::sc_in<sc_core::sc_time> clk_i{"clk_i"};

-    sc_core::sc_port<tlm::tlm_peek_if<uint64_t>, 1, sc_core::SC_ZERO_OR_MORE_BOUND> mtime_o{"mtime_o"};
-
    cci::cci_param<std::string> elf_file{"elf_file", ""};

    cci::cci_param<bool> enable_disass{"enable_disass", false};
@@ -115,8 +135,6 @@ public:
 #else
    sc_core::sc_in<bool> clk_i{"clk_i"};

-    sc_core::sc_in<uint64_t> mtime_i{"mtime_i"};
-
    scml_property<std::string> elf_file{"elf_file", ""};

    scml_property<bool> enable_disass{"enable_disass", false};
@@ -159,13 +177,13 @@ public:

    ~core_complex();

-    inline unsigned get_last_bus_cycles() {
+    unsigned get_last_bus_cycles() override {
        auto mem_incr = std::max(ibus_inc, dbus_inc);
        ibus_inc = dbus_inc = 0;
        return mem_incr > 1 ? mem_incr : 1;
    }

-    inline void sync(uint64_t cycle) {
+    void sync(uint64_t cycle) override {
        auto core_inc = curr_clk * (cycle - last_sync_cycle);
        quantum_keeper.inc(core_inc);
        if(quantum_keeper.need_sync()) {
@@ -175,20 +193,22 @@ public:
        last_sync_cycle = cycle;
    }

-    bool read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch);
+    bool read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) override;

-    bool write_mem(uint64_t addr, unsigned length, const uint8_t* const data);
+    bool write_mem(uint64_t addr, unsigned length, const uint8_t* const data) override;

-    bool read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data);
+    bool read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) override;

-    bool write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data);
+    bool write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) override;

    void trace(sc_core::sc_trace_file* trf) const override;

-    bool disass_output(uint64_t pc, const std::string instr);
+    bool disass_output(uint64_t pc, const std::string instr) override;

    void set_clock_period(sc_core::sc_time period);

+    char const* hier_name() override { return name(); }
+
 protected:
    void before_end_of_elaboration() override;
    void start_of_simulation() override;
--- a/src/sysc/register_tgc_c.cpp
+++ b/src/sysc/register_tgc_c.cpp
@@ -46,12 +46,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|interp",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -62,12 +62,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|llvm",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -79,12 +79,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|tcc",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -96,12 +96,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|asmjit",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
--- a/src/sysc/sc_core_adapter.h
+++ b/src/sysc/sc_core_adapter.h
@@ -21,7 +21,7 @@ public:
    using reg_t = typename iss::arch::traits<typename PLAT::core>::reg_t;
    using phys_addr_t = typename iss::arch::traits<typename PLAT::core>::phys_addr_t;
    using heart_state_t = typename PLAT::hart_state_type;
-    sc_core_adapter(sysc::tgfs::core_complex* owner)
+    sc_core_adapter(sysc::tgfs::core_complex_if* owner)
    : owner(owner) {}

    iss::arch_if* get_arch_if() override { return this; }
@@ -54,9 +54,9 @@ public:
            std::stringstream s;
            s << "[p:" << lvl[this->reg.PRIV] << ";s:0x" << std::hex << std::setfill('0') << std::setw(sizeof(reg_t) * 2)
              << (reg_t)this->state.mstatus << std::dec << ";c:" << this->reg.icount + this->cycle_offset << "]";
-            SCCDEBUG(owner->name()) << "disass: "
-                                    << "0x" << std::setw(16) << std::right << std::setfill('0') << std::hex << pc << "\t\t" << std::setw(40)
-                                    << std::setfill(' ') << std::left << instr << s.str();
+            SCCDEBUG(owner->hier_name()) << "disass: "
+                                         << "0x" << std::setw(16) << std::right << std::setfill('0') << std::hex << pc << "\t\t"
+                                         << std::setw(40) << std::setfill(' ') << std::left << instr << s.str();
        }
    };

@@ -71,62 +71,66 @@ public:
    iss::status write_mem(phys_addr_t addr, unsigned length, const uint8_t* const data) override {
        if(addr.access && iss::access_type::DEBUG)
            return owner->write_mem_dbg(addr.val, length, data) ? iss::Ok : iss::Err;
-        else {
-            auto tohost_upper = (sizeof(reg_t) == 4 && addr.val == (this->tohost + 4)) || (sizeof(reg_t) == 8 && addr.val == this->tohost);
-            auto tohost_lower = (sizeof(reg_t) == 4 && addr.val == this->tohost) || (sizeof(reg_t) == 64 && addr.val == this->tohost);
-            if(tohost_lower || tohost_upper) {
-                if(tohost_upper || (tohost_lower && to_host_wr_cnt > 0)) {
-                    switch(hostvar >> 48) {
-                    case 0:
-                        if(hostvar != 0x1) {
-                            SCCINFO(owner->name())
-                                << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar << "), stopping simulation";
-                        } else {
-                            SCCINFO(owner->name())
-                                << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar << "), stopping simulation";
-                        }
-                        this->reg.trap_state = std::numeric_limits<uint32_t>::max();
-                        this->interrupt_sim = hostvar;
-#ifndef WITH_TCC
-                        throw(iss::simulation_stopped(hostvar));
-#endif
-                        break;
-                    default:
-                        break;
-                    }
-                } else if(tohost_lower)
-                    to_host_wr_cnt++;
-                return iss::Ok;
-            } else {
-                auto res = owner->write_mem(addr.val, length, data) ? iss::Ok : iss::Err;
-                // clear MTIP on mtimecmp write
-                if(addr.val == 0x2004000) {
-                    reg_t val;
-                    this->read_csr(iss::arch::mip, val);
-                    if(val & (1ULL << 7))
-                        this->write_csr(iss::arch::mip, val & ~(1ULL << 7));
+        if(addr.val == this->tohost) {
+            reg_t cur_data = *reinterpret_cast<const reg_t*>(data);
+            // Extract Device (bits 63:56)
+            uint8_t device = sizeof(reg_t) == 4 ? 0 : (cur_data >> 56) & 0xFF;
+            // Extract Command (bits 55:48)
+            uint8_t command = sizeof(reg_t) == 4 ? 0 : (cur_data >> 48) & 0xFF;
+            // Extract payload (bits 47:0)
+            uint64_t payload_addr = cur_data & 0xFFFFFFFFFFFFULL; // 24bits
+            if(payload_addr & 1) {
+                if(payload_addr != 0x1) {
+                    SCCERR(owner->hier_name()) << "tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr
+                                               << "), stopping simulation";
+                } else {
+                    SCCINFO(owner->hier_name())
+                        << "tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr << "), stopping simulation";
                }
-                return res;
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+#ifndef WITH_TCC
+                throw(iss::simulation_stopped(payload_addr));
+#endif
+                return iss::Ok;
            }
+            if(device == 0 && command == 0) {
+                std::array<uint64_t, 8> loaded_payload;
+                auto res = owner->read_mem(payload_addr, 8 * sizeof(uint64_t), reinterpret_cast<uint8_t*>(loaded_payload.data()), false)
+                               ? iss::Ok
+                               : iss::Err;
+                if(res == iss::Err) {
+                    SCCERR(owner->hier_name()) << "Syscall read went wrong";
+                    return iss::Ok;
+                }
+                uint64_t syscall_num = loaded_payload.at(0);
+                if(syscall_num == 64) // SYS_WRITE
+                    return this->execute_sys_write(this, loaded_payload, PLAT::MEM);
+                SCCERR(owner->hier_name()) << "tohost syscall with number 0x" << std::hex << syscall_num << std::dec << " (" << syscall_num
+                                           << ") not implemented";
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
+            }
+            SCCERR(owner->hier_name()) << "tohost functionality not implemented for device " << device << " and command " << command;
+            this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+            this->interrupt_sim = payload_addr;
+            return iss::Ok;
        }
+        auto res = owner->write_mem(addr.val, length, data) ? iss::Ok : iss::Err;
+        // clear MTIP on mtimecmp write
+        if(addr.val == 0x2004000) {
+            reg_t val;
+            this->read_csr(iss::arch::mip, val);
+            if(val & (1ULL << 7))
+                this->write_csr(iss::arch::mip, val & ~(1ULL << 7));
+        }
+        return res;
    }

    iss::status read_csr(unsigned addr, reg_t& val) override {
-#ifndef CWR_SYSTEMC
-        if((addr == iss::arch::time || addr == iss::arch::timeh) && owner->mtime_o.get_interface(0)) {
-            uint64_t time_val;
-            bool ret = owner->mtime_o->nb_peek(time_val);
-            if(addr == iss::arch::time) {
-                val = static_cast<reg_t>(time_val);
-            } else if(addr == iss::arch::timeh) {
-                if(sizeof(reg_t) != 4)
-                    return iss::Err;
-                val = static_cast<reg_t>(time_val >> 32);
-            }
-            return ret ? iss::Ok : iss::Err;
-#else
        if((addr == iss::arch::time || addr == iss::arch::timeh)) {
-            uint64_t time_val = owner->mtime_i.read();
+            uint64_t time_val = owner->mtime_i.get_interface() ? owner->mtime_i.read() : 0;
            if(addr == iss::arch::time) {
                val = static_cast<reg_t>(time_val);
            } else if(addr == iss::arch::timeh) {
@@ -135,14 +139,13 @@ public:
                val = static_cast<reg_t>(time_val >> 32);
            }
            return iss::Ok;
-#endif
        } else {
            return PLAT::read_csr(addr, val);
        }
    }

    void wait_until(uint64_t flags) override {
-        SCCDEBUG(owner->name()) << "Sleeping until interrupt";
+        SCCDEBUG(owner->hier_name()) << "Sleeping until interrupt";
        while(this->reg.pending_trap == 0 && (this->csr[iss::arch::mip] & this->csr[iss::arch::mie]) == 0) {
            sc_core::wait(wfi_evt);
        }
@@ -173,13 +176,12 @@ public:
            this->csr[iss::arch::mip] &= ~mask;
        this->check_interrupt();
        if(value)
-            SCCTRACE(owner->name()) << "Triggering interrupt " << id << " Pending trap: " << this->reg.pending_trap;
+            SCCTRACE(owner->hier_name()) << "Triggering interrupt " << id << " Pending trap: " << this->reg.pending_trap;
    }

 private:
-    sysc::tgfs::core_complex* const owner;
+    sysc::tgfs::core_complex_if* const owner{nullptr};
    sc_core::sc_event wfi_evt;
-    uint64_t hostvar{std::numeric_limits<uint64_t>::max()};
    unsigned to_host_wr_cnt = 0;
    bool first{true};
 };
--- a/src/vm/asmjit/vm_tgc5c.cpp
+++ b/src/vm/asmjit/vm_tgc5c.cpp
--- a/src/vm/fp_functions.cpp
+++ b/src/vm/fp_functions.cpp
@@ -33,6 +33,7 @@
 ////////////////////////////////////////////////////////////////////////////////

 #include "fp_functions.h"
+#include <array>

 extern "C" {
 #include "internals.h"
@@ -43,9 +44,10 @@ extern "C" {
 #include <limits>

 using this_t = uint8_t*;
-const uint8_t rmm_map[] = {
-    softfloat_round_near_even /*RNE*/,   softfloat_round_minMag /*RTZ*/, softfloat_round_min /*RDN*/, softfloat_round_max /*RUP?*/,
-    softfloat_round_near_maxMag /*RMM*/, softfloat_round_max /*RTZ*/,    softfloat_round_max /*RTZ*/, softfloat_round_max /*RTZ*/,
+// this does not inlcude any reserved rm or the DYN rm, as DYN rm should be taken care of in the vm_impl
+const std::array<uint8_t, 5> rmm_map = {
+    softfloat_round_near_even /*RNE*/, softfloat_round_minMag /*RTZ*/, softfloat_round_min /*RDN*/, softfloat_round_max /*RUP?*/,
+    softfloat_round_near_maxMag /*RMM*/
 };

 const uint32_t quiet_nan32 = 0x7fC00000;
@@ -56,7 +58,7 @@ uint32_t fget_flags() { return softfloat_exceptionFlags & 0x1f; }

 uint32_t fadd_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_add(v1f, v2f);
    return r.v;
@@ -64,7 +66,7 @@ uint32_t fadd_s(uint32_t v1, uint32_t v2, uint8_t mode) {

 uint32_t fsub_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_sub(v1f, v2f);
    return r.v;
@@ -72,7 +74,7 @@ uint32_t fsub_s(uint32_t v1, uint32_t v2, uint8_t mode) {

 uint32_t fmul_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_mul(v1f, v2f);
    return r.v;
@@ -80,7 +82,7 @@ uint32_t fmul_s(uint32_t v1, uint32_t v2, uint8_t mode) {

 uint32_t fdiv_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_div(v1f, v2f);
    return r.v;
@@ -88,7 +90,7 @@ uint32_t fdiv_s(uint32_t v1, uint32_t v2, uint8_t mode) {

 uint32_t fsqrt_s(uint32_t v1, uint8_t mode) {
    float32_t v1f{v1};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float32_t r = f32_sqrt(v1f);
    return r.v;
@@ -130,18 +132,18 @@ uint32_t fcvt_s(uint32_t v1, uint32_t op, uint8_t mode) {
    softfloat_exceptionFlags = 0;
    float32_t r;
    switch(op) {
-    case 0: { // w->s, fp to int32
-        uint_fast32_t res = f32_to_i32(v1f, rmm_map[mode & 0x7], true);
+    case 0: { // FCVT__W__S
+        uint_fast32_t res = f32_to_i32(v1f, rmm_map.at(mode), true);
        return (uint32_t)res;
    }
-    case 1: { // wu->s
-        uint_fast32_t res = f32_to_ui32(v1f, rmm_map[mode & 0x7], true);
+    case 1: { // FCVT__WU__S
+        uint_fast32_t res = f32_to_ui32(v1f, rmm_map.at(mode), true);
        return (uint32_t)res;
    }
-    case 2: // s->w
-        r = i32_to_f32(v1);
+    case 2: // FCVT__S__W
+        r = i32_to_f32((int32_t)v1);
        return r.v;
-    case 3: // s->wu
+    case 3: // FCVT__S__WU
        r = ui32_to_f32(v1);
        return r.v;
    }
@@ -149,12 +151,24 @@ uint32_t fcvt_s(uint32_t v1, uint32_t op, uint8_t mode) {
 }

 uint32_t fmadd_s(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t op, uint8_t mode) {
-    // op should be {softfloat_mulAdd_subProd(2), softfloat_mulAdd_subC(1)}
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    uint32_t F32_SIGN = 1UL << 31;
+    switch(op) {
+    case 0: // FMADD_S
+        break;
+    case 1: // FMSUB_S
+        v3 ^= F32_SIGN;
+        break;
+    case 2: // FNMADD_S
+        v1 ^= F32_SIGN;
+        v3 ^= F32_SIGN;
+        break;
+    case 3: // FNMSUB_S
+        v1 ^= F32_SIGN;
+        break;
+    }
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
-    float32_t res = softfloat_mulAddF32(v1, v2, v3, op & 0x1);
-    if(op > 1)
-        res.v ^= 1ULL << 31;
+    float32_t res = softfloat_mulAddF32(v1, v2, v3, 0);
    return res.v;
 }

@@ -189,8 +203,8 @@ uint32_t fclass_s(uint32_t v1) {
    uA.f = a;
    uiA = uA.ui;

-    uint_fast16_t infOrNaN = expF32UI(uiA) == 0xFF;
-    uint_fast16_t subnormalOrZero = expF32UI(uiA) == 0;
+    bool infOrNaN = expF32UI(uiA) == 0xFF;
+    bool subnormalOrZero = expF32UI(uiA) == 0;
    bool sign = signF32UI(uiA);
    bool fracZero = fracF32UI(uiA) == 0;
    bool isNaN = isNaNF32UI(uiA);
@@ -203,9 +217,13 @@ uint32_t fclass_s(uint32_t v1) {
 }

 uint32_t fconv_d2f(uint64_t v1, uint8_t mode) {
-    softfloat_roundingMode = rmm_map[mode & 0x7];
-    bool nan = (v1 & defaultNaNF64UI) == defaultNaNF64UI;
-    if(nan) {
+    bool isNan = isNaNF64UI(v1);
+    bool isSNaN = softfloat_isSigNaNF64UI(v1);
+    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_exceptionFlags = 0;
+    if(isNan) {
+        if(isSNaN)
+            softfloat_raiseFlags(softfloat_flag_invalid);
        return defaultNaNF32UI;
    } else {
        float32_t res = f64_to_f32(float64_t{v1});
@@ -214,11 +232,11 @@ uint32_t fconv_d2f(uint64_t v1, uint8_t mode) {
 }

 uint64_t fconv_f2d(uint32_t v1, uint8_t mode) {
-    bool nan = (v1 & defaultNaNF32UI) == defaultNaNF32UI;
-    if(nan) {
+    bool infOrNaN = expF32UI(v1) == 0xFF;
+    bool subnormalOrZero = expF32UI(v1) == 0;
+    if(infOrNaN || subnormalOrZero) {
        return defaultNaNF64UI;
    } else {
-        softfloat_roundingMode = rmm_map[mode & 0x7];
        float64_t res = f32_to_f64(float32_t{v1});
        return res.v;
    }
@@ -228,7 +246,7 @@ uint64_t fadd_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    bool nan = (v1 & defaultNaNF32UI) == quiet_nan32;
    bool snan = softfloat_isSigNaNF32UI(v1);
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_add(v1f, v2f);
    return r.v;
@@ -236,7 +254,7 @@ uint64_t fadd_d(uint64_t v1, uint64_t v2, uint8_t mode) {

 uint64_t fsub_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_sub(v1f, v2f);
    return r.v;
@@ -244,7 +262,7 @@ uint64_t fsub_d(uint64_t v1, uint64_t v2, uint8_t mode) {

 uint64_t fmul_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_mul(v1f, v2f);
    return r.v;
@@ -252,7 +270,7 @@ uint64_t fmul_d(uint64_t v1, uint64_t v2, uint8_t mode) {

 uint64_t fdiv_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_div(v1f, v2f);
    return r.v;
@@ -260,7 +278,7 @@ uint64_t fdiv_d(uint64_t v1, uint64_t v2, uint8_t mode) {

 uint64_t fsqrt_d(uint64_t v1, uint8_t mode) {
    float64_t v1f{v1};
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
    float64_t r = f64_sqrt(v1f);
    return r.v;
@@ -298,22 +316,23 @@ uint64_t fcmp_d(uint64_t v1, uint64_t v2, uint32_t op) {
 }

 uint64_t fcvt_d(uint64_t v1, uint32_t op, uint8_t mode) {
+
    float64_t v1f{v1};
    softfloat_exceptionFlags = 0;
    float64_t r;
    switch(op) {
-    case 0: { // l->d, fp to int32
-        int64_t res = f64_to_i64(v1f, rmm_map[mode & 0x7], true);
+    case 0: { // l from d
+        int64_t res = f64_to_i64(v1f, rmm_map.at(mode), true);
        return (uint64_t)res;
    }
-    case 1: { // lu->s
-        uint64_t res = f64_to_ui64(v1f, rmm_map[mode & 0x7], true);
+    case 1: { // lu from d
+        uint64_t res = f64_to_ui64(v1f, rmm_map.at(mode), true);
        return res;
    }
-    case 2: // s->l
+    case 2: // d from l
        r = i64_to_f64(v1);
        return r.v;
-    case 3: // s->lu
+    case 3: // d from lu
        r = ui64_to_f64(v1);
        return r.v;
    }
@@ -321,12 +340,24 @@ uint64_t fcvt_d(uint64_t v1, uint32_t op, uint8_t mode) {
 }

 uint64_t fmadd_d(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mode) {
-    // op should be {softfloat_mulAdd_subProd(2), softfloat_mulAdd_subC(1)}
-    softfloat_roundingMode = rmm_map[mode & 0x7];
+    uint64_t F64_SIGN = 1ULL << 63;
+    switch(op) {
+    case 0: // FMADD_D
+        break;
+    case 1: // FMSUB_D
+        v3 ^= F64_SIGN;
+        break;
+    case 2: // FNMADD_D
+        v1 ^= F64_SIGN;
+        v3 ^= F64_SIGN;
+        break;
+    case 3: // FNMSUB_D
+        v1 ^= F64_SIGN;
+        break;
+    }
+    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
-    float64_t res = softfloat_mulAddF64(v1, v2, v3, op & 0x1);
-    if(op > 1)
-        res.v ^= 1ULL << 63;
+    float64_t res = softfloat_mulAddF64(v1, v2, v3, 0);
    return res.v;
 }

@@ -362,8 +393,8 @@ uint64_t fclass_d(uint64_t v1) {
    uA.f = a;
    uiA = uA.ui;

-    uint_fast16_t infOrNaN = expF64UI(uiA) == 0x7FF;
-    uint_fast16_t subnormalOrZero = expF64UI(uiA) == 0;
+    bool infOrNaN = expF64UI(uiA) == 0x7FF;
+    bool subnormalOrZero = expF64UI(uiA) == 0;
    bool sign = signF64UI(uiA);
    bool fracZero = fracF64UI(uiA) == 0;
    bool isNaN = isNaNF64UI(uiA);
@@ -381,9 +412,9 @@ uint64_t fcvt_32_64(uint32_t v1, uint32_t op, uint8_t mode) {
    float64_t r;
    switch(op) {
    case 0: // l->s, fp to int32
-        return f32_to_i64(v1f, rmm_map[mode & 0x7], true);
+        return f32_to_i64(v1f, rmm_map.at(mode), true);
    case 1: // wu->s
-        return f32_to_ui64(v1f, rmm_map[mode & 0x7], true);
+        return f32_to_ui64(v1f, rmm_map.at(mode), true);
    case 2: // s->w
        r = i32_to_f64(v1);
        return r.v;
@@ -399,11 +430,11 @@ uint32_t fcvt_64_32(uint64_t v1, uint32_t op, uint8_t mode) {
    float32_t r;
    switch(op) {
    case 0: { // wu->s
-        int32_t r = f64_to_i32(float64_t{v1}, rmm_map[mode & 0x7], true);
+        int32_t r = f64_to_i32(float64_t{v1}, rmm_map.at(mode), true);
        return r;
    }
    case 1: { // wu->s
-        uint32_t r = f64_to_ui32(float64_t{v1}, rmm_map[mode & 0x7], true);
+        uint32_t r = f64_to_ui32(float64_t{v1}, rmm_map.at(mode), true);
        return r;
    }
    case 2: // l->s, fp to int32
--- a/src/vm/interp/vm_tgc5c.cpp
+++ b/src/vm/interp/vm_tgc5c.cpp
--- a/src/vm/llvm/vm_tgc5c.cpp
+++ b/src/vm/llvm/vm_tgc5c.cpp
--- a/src/vm/tcc/vm_tgc5c.cpp
+++ b/src/vm/tcc/vm_tgc5c.cpp
Author	SHA1	Message	Date
Eyck Jentzsch	502f3e8df9	fixes htif behavior and instrumentation interface	2025-03-14 19:43:20 +01:00
Hongyu Liu	88475bfa55	changes the io_buf	2025-03-14 12:14:20 +01:00
Eyck Jentzsch	23842742a6	factors clic & pmp into separate units	2025-03-13 12:13:41 +01:00
Eyck Jentzsch	a13b7ac6d3	separates functional memory into separate unit	2025-03-12 09:26:51 +01:00
Eyck Jentzsch	fb0f6255e9	replaces virtual functions with memory pointers (kind of)	2025-03-11 08:31:25 +01:00
Eyck Jentzsch	57d5ea92be	moves common functionality to base class	2025-03-10 16:00:26 +01:00
Eyck Jentzsch	383d762abc	applies clang-format and updates SystemC HTIF implementation	2025-03-06 12:10:12 +01:00
Eyck Jentzsch	03cbd305c6	replaces literal constant with symbolic definition	2025-02-28 19:34:07 +01:00
Eyck Jentzsch	9f5326c110	extends htif for 32bit systems	2025-02-13 13:39:47 +01:00
Eyck Jentzsch	f4718c6de3	Merge remote-tracking branch 'origin/feature/htif' into develop	2025-02-13 09:34:31 +01:00
Eyck Jentzsch	53de21eef9	adds generator changed output	2025-02-12 20:45:04 +01:00
Eyck-Alexander Jentzsch	d443c89c87	removes llvm from dbt-rise-tgc build system as it is handled in dbt-rise-core	2024-12-28 13:10:49 +01:00
Eyck-Alexander Jentzsch	9a2df32d57	updates templates	2024-12-28 13:07:07 +01:00
Eyck-Alexander Jentzsch	be0f783af8	adds cycle increment to tcc	2024-12-28 13:06:46 +01:00
Eyck-Alexander Jentzsch	1089800682	updates vm_impls and core.h to work with new vm_base	2024-12-28 08:24:09 +01:00
Eyck Jentzsch	a6a6f51f0b	adds clang-format fixes	2024-12-06 15:50:50 +01:00
Eyck-Alexander Jentzsch	21e1f791ad	corrects sysc integration template and corresponding file	2024-12-06 09:49:02 +01:00
Eyck-Alexander Jentzsch	be6f5791fa	adds update to cyclecount after each instr for asmjit	2024-11-26 20:26:18 +01:00
Eyck-Alexander Jentzsch	d907dc7f54	corrects tohost functionality and minor cleanup	2024-11-22 17:35:12 +01:00
Eyck-Alexander Jentzsch	75e81ce236	copies new tohost implemenation from hart_m_p	2024-11-14 16:51:26 +01:00
Eyck-Alexander Jentzsch	82a70efdb8	small reorder to make tohost output more readable	2024-11-14 16:51:26 +01:00
Eyck-Alexander Jentzsch	978c3db06e	minor improvements to readability	2024-11-14 16:51:26 +01:00
Eyck-Alexander Jentzsch	0e88664ff7	adds better tohost writing implementation, allowing the standard riscv-isa-test benchmarks to run	2024-11-14 16:51:26 +01:00
Eyck-Alexander Jentzsch	ac818f304d	increases verbosity incase elf loading goes wrong	2024-10-21 16:42:58 +02:00
Eyck-Alexander Jentzsch	ad60449073	updates generated cores	2024-09-27 20:04:58 +02:00
Eyck-Alexander Jentzsch	b45b3589fa	updates templates to immediately trap when gen_trap is called	2024-09-27 20:03:51 +02:00
Eyck-Alexander Jentzsch	1fb7e8fcea	improves logging output	2024-09-24 08:39:34 +02:00
Eyck-Alexander Jentzsch	5f9d0beafb	corrects softfloat to comply with RVD ACT	2024-09-23 22:22:57 +02:00
Eyck-Alexander Jentzsch	4c0d1c75aa	adds addr formatting to logging	2024-09-23 12:21:43 +02:00
Eyck-Alexander Jentzsch	2f3abf2f76	adds namespaces for ELFIO	2024-09-23 11:55:18 +02:00
Eyck Jentzsch	62768bf81e	applies clang format	2024-09-23 10:05:33 +02:00
Eyck Jentzsch	f6be8ec006	adds elfio test utility	2024-09-23 09:29:08 +02:00
Eyck Jentzsch	a8f56b6e27	removes code dupication by unifying elf file read	2024-09-23 09:28:27 +02:00
Eyck-Alexander Jentzsch	76ea0db25d	adds newest generated vm_impl	2024-08-17 23:19:51 +02:00
Eyck Jentzsch	ec1b820c18	fixes target xml generation	2024-08-17 19:36:53 +02:00
Eyck Jentzsch	64329cf0f6	fixes use of icount vs. cycle	2024-08-17 19:36:40 +02:00
Eyck Jentzsch	9de0aed84d	expands some error message	2024-08-17 16:55:49 +02:00
Eyck Jentzsch	bb4e2766d1	applies clang-format	2024-08-17 16:12:57 +02:00
Eyck Jentzsch	0996d15bd4	removes debug code	2024-08-17 12:48:48 +02:00
Eyck Jentzsch	6305efa7c2	implements proper target XML generation incl. CSRs	2024-08-17 12:40:40 +02:00
Eyck Jentzsch	de79adc50d	updates debugger hook to stop before fetching instructions this relates to https://github.com/Minres/DBT-RISE-RISCV/issues/8 : Debugger loses control when trap vector fetch fails and https://github.com/Minres/DBT-RISE-RISCV/issues/7 : Two debugger single-steps are required at reset vector	2024-08-17 12:39:54 +02:00
Eyck Jentzsch	0473aa5344	fixes SystemC wrapper wrt. templated core_complex	2024-08-17 12:34:17 +02:00
Eyck-Alexander Jentzsch	a45fcd28db	updates fn calling generation	2024-08-17 08:22:04 +02:00
Eyck-Alexander Jentzsch	0f15032210	removes gen_wait as wait can be called like any other extern function	2024-08-14 15:25:06 +02:00
Eyck-Alexander Jentzsch	efc11d87a5	updates template with fcsr check, adds extra braces on If Statements	2024-08-14 14:32:58 +02:00
Eyck-Alexander Jentzsch	4a19e27926	adds changes due to generator being more inline with others	2024-08-14 13:52:08 +02:00
Eyck-Alexander Jentzsch	c15cdb0955	expands return values of jit creating functions to inhibit endless trapping	2024-08-14 11:49:59 +02:00
Eyck-Alexander Jentzsch	6609d12582	adds flimit that gets properly evaluated in interp	2024-08-13 15:22:34 +02:00
Eyck-Alexander Jentzsch	b5341700aa	updates template and adds braces when using conditions	2024-08-13 08:55:14 +02:00
Eyck-Alexander Jentzsch	0b5062d21c	adds fp_functions here to remove dependencies in dbt-rise-core	2024-08-09 11:56:32 +02:00
Eyck-Alexander Jentzsch	fbca690b3b	replaces gen_wait, updates template to include fp_functions when necessary	2024-08-08 12:57:08 +02:00
Eyck-Alexander Jentzsch	235a7e6e24	updates template	2024-08-08 11:08:28 +02:00
Eyck-Alexander Jentzsch	62d21e1156	updates disass	2024-08-07 09:21:07 +02:00
Eyck-Alexander Jentzsch	9c51d6eade	improves interp, only calls decode once per instr	2024-08-07 09:20:11 +02:00
Eyck-Alexander Jentzsch	2878dca6b5	updates templates	2024-08-06 08:32:05 +02:00
Eyck Jentzsch	c28e8fd00c	removes left-overs	2024-08-04 18:57:20 +02:00
Eyck Jentzsch	b3cc9d2346	makes core_complex a template	2024-08-04 18:47:32 +02:00
Eyck Jentzsch	933f08494c	removes C++17 dependency from asmjit backend	2024-08-04 17:41:49 +02:00
Eyck Jentzsch	21f8eab432	adds regenerated tgc5c	2024-08-02 19:18:28 +02:00
Eyck Jentzsch	6ddb8da07f	fixes missing rename	2024-08-02 11:58:51 +02:00
Eyck Jentzsch	edf456c59f	fixes missing braces	2024-08-02 10:33:15 +02:00
Eyck Jentzsch	42efced1eb	fixes FCSR behavior if no floating point is implemented	2024-08-02 08:59:22 +02:00
Eyck Jentzsch	c376e34b2b	applies clang format	2024-08-01 11:02:10 +02:00
Eyck-Alexander Jentzsch	f579ec6e48	changes access to rounding mode to fail explicitly instead of unintended behavior	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	fd20e66f1f	changes softfloat API usage, all effected Instrs pass test suite	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	5d69b79232	reverts patches in softfloat	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	2edd68d1bd	refactors zeroProd branch to allow for better case handling	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	7ffa7667b6	fixes concerning FMADD_S, FMSUB_S, FNMADD_S, and FNSUB_S mostly about ensuring correct sign	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	93d89e07ca	removes wrong compile definition	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	17dcba4b90	updates softfloat to #b51ef8f of softfloat3 https://github.com/ucb-bar/berkeley-softfloat-3/	2024-07-31 12:30:41 +02:00
Eyck-Alexander Jentzsch	39d2518fdd	checkin: tgc5f builds and runs through	2024-07-31 12:30:41 +02:00
stas	a365110054	fix format	2024-07-30 13:34:23 +02:00
Eyck Jentzsch	d2efb23ff7	fixes cache behavior for fetches	2024-07-25 19:33:50 +02:00
Eyck-Alexander Jentzsch	04b7a09b19	updates date in templates	2024-07-25 17:25:12 +02:00
Eyck-Alexander Jentzsch	72b11beac5	moves decoder to dbt-rise-core	2024-07-25 10:13:38 +02:00
Eyck Jentzsch	e87b7d5fd0	applies clang-format	2024-07-24 14:48:50 +02:00
Eyck Jentzsch	5a2b96ef3e	adds logging categories for ISS	2024-07-24 12:30:07 +02:00
Eyck-Alexander Jentzsch	c6b99cd155	introduces new decoder to interp backend	2024-07-24 12:28:35 +02:00
Eyck-Alexander Jentzsch	b1306c3a47	improves instruction decoding by avoiding copying, replaces .size()	2024-07-24 08:54:37 +02:00
Eyck-Alexander Jentzsch	0d6bf924ed	changes jh.globals from map to vector	2024-07-23 15:45:51 +02:00
Eyck-Alexander Jentzsch	86de536c8f	changes jh globals to seperate riscv specifics	2024-07-23 14:35:31 +02:00
Eyck-Alexander Jentzsch	051dd5e2d3	updates templates for decoder in seperate class, adds again generated templates	2024-07-23 13:46:10 +02:00
Eyck-Alexander Jentzsch	e3942be776	Introduces decoder in a seperate class	2024-07-23 13:08:53 +02:00
Eyck-Alexander Jentzsch	6ee484a771	moves instruction decoder into own class	2024-07-23 11:30:33 +02:00
Eyck-Alexander Jentzsch	60808c8649	corrects template since util fns are no longer vm_base members	2024-07-23 11:29:56 +02:00
Eyck-Alexander Jentzsch	0432803d82	updates templates and vm impls for better LAST_BRANCH handling	2024-07-22 09:04:17 +02:00
Eyck-Alexander Jentzsch	4f5d9214ed	adds newly generated instr.yaml	2024-07-18 14:31:36 +02:00
Eyck-Alexander Jentzsch	d42d2ce533	corrects illegal instruction for llvm	2024-07-18 14:04:23 +02:00
Eyck-Alexander Jentzsch	236d12d7f5	integrates gen_bool for Conditions (was truncation) into llvm	2024-07-18 13:30:42 +02:00
Eyck-Alexander Jentzsch	e1b6cab890	removes setting of NEXT_PC to max when trapping in llvm and asmjit, adds default disass to llvm	2024-07-18 12:02:40 +02:00
Eyck-Alexander Jentzsch	8361f88718	removes setting of NEXT_PC to max if trap	2024-07-18 11:37:53 +02:00
Eyck-Alexander Jentzsch	2ec7ea4b41	removes leftover gen_sync in asmjit	2024-07-17 22:39:12 +02:00
Eyck-Alexander Jentzsch	b24965d321	corrects gen_sync update order, improves illegal instruction	2024-07-17 20:52:01 +02:00
Eyck-Alexander Jentzsch	244bf6d2f2	corrects gen_sync before trap check, improves illegal_instruction	2024-07-17 20:25:49 +02:00
Eyck-Alexander Jentzsch	1a4465a371	changes template: adds correct illegal instruction, reorders gen_sync to allow correct instr id eve when trapping, adds newly generated vm	2024-07-17 19:59:01 +02:00
Eyck-Alexander Jentzsch	fa82a50824	fixes typo in templates	2024-07-17 17:24:17 +02:00
Eyck-Alexander Jentzsch	6dc17857da	updates template	2024-07-17 15:36:08 +02:00
Eyck-Alexander Jentzsch	11a30caae8	integrates generator changes to canPrecompute	2024-07-17 15:14:13 +02:00
Eyck-Alexander Jentzsch	ac1a26a10c	integrates new tval changes into llvm	2024-07-17 14:17:02 +02:00
Eyck-Alexander Jentzsch	7a199e122d	integrates new tval changes into asmjit	2024-07-17 09:42:12 +02:00
Eyck-Alexander Jentzsch	d8c3d2e19c	integrates new tval changes into tcc	2024-07-16 17:35:23 +02:00
Eyck-Alexander Jentzsch	375755999a	integrates new tval changes	2024-07-16 15:32:35 +02:00
stas	9996fd4833	change cache line size to 64	2024-07-11 14:03:58 +02:00
Eyck-Alexander Jentzsch	149b3136d2	updates generated files	2024-07-10 12:55:36 +02:00
Eyck-Alexander Jentzsch	ac8f8b0539	updates vms with fixed Zc in tgc5c.core_desc	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	b2cbf90d0b	updates generated files	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	373145478e	updats file because of generator changes	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	55b0cea94f	changes vm_base util API	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	5b17599aa2	allows usage of std::variants	2024-07-10 12:51:59 +02:00
Eyck-Alexander Jentzsch	4cfb15c7cd	Asmjit and interp working	2024-07-10 12:51:31 +02:00
Eyck Jentzsch	63da7f8d57	applies clang-format	2024-07-09 13:57:11 +02:00
Eyck Jentzsch	fb4012fbd1	moves likely annotation	2024-07-09 13:52:10 +02:00
Eyck Jentzsch	24449f1c0f	fixes some elf load issue	2024-07-05 12:18:36 +02:00
Eyck Jentzsch	fd303c8343	fixes asmjit deprecation warning	2024-07-05 07:51:37 +02:00
Eyck Jentzsch	346b177a87	extends finishing conditions	2024-07-05 05:52:29 +02:00
stas	d4ec131fa7	change COUNT_LIMIT to ICOUNT_LIMIT	2024-07-04 10:46:24 +02:00
Eyck-Alexander Jentzsch	48370a4555	asmjit passes backend with new CoreDSL	2024-06-22 09:28:26 +02:00
Eyck-Alexander Jentzsch	36b076774e	Merge branch 'develop' of https://git.minres.com/DBT-RISE/DBT-RISE-TGC into develop	2024-06-21 13:35:30 +02:00
Eyck-Alexander Jentzsch	482a4ec253	fixes semihosting callbacks in templates	2024-06-21 13:35:25 +02:00
Eyck Jentzsch	2fb28364c5	fixes remaining templates	2024-06-21 10:49:36 +02:00
Eyck Jentzsch	8460f4ab7f	updates templates to re-enable interactive debugging of generator	2024-06-21 10:46:11 +02:00
Eyck Jentzsch	3fd51cc68c	fixes templates	2024-06-14 19:54:33 +02:00
Eyck Jentzsch	551822916c	applies clang-format	2024-06-14 17:43:12 +02:00