changes the io_buf

Merge remote-tracking branch 'origin/feature/htif' into develop
adds generator changed output
2025-03-11 12:00:31 +01:00 · 2025-02-13 09:34:31 +01:00 · 2025-02-12 20:45:04 +01:00 · 2024-12-28 13:10:49 +01:00 · 2024-12-28 13:07:07 +01:00 · 2024-12-28 13:06:46 +01:00
37 changed files with 7033 additions and 3131 deletions
@@ -20,6 +20,7 @@ set(LIB_SOURCES
    src/iss/arch/tgc5c.cpp
    src/vm/interp/vm_tgc5c.cpp
    src/vm/fp_functions.cpp
+    src/iss/debugger/csr_names.cpp
    src/iss/semihosting/semihosting.cpp
 )

@@ -108,16 +109,6 @@ if(TARGET yaml-cpp::yaml-cpp)
    target_link_libraries(${PROJECT_NAME} PUBLIC yaml-cpp::yaml-cpp)
 endif()

-if(WITH_LLVM)
-    find_package(LLVM)
-    target_compile_definitions(${PROJECT_NAME} PUBLIC ${LLVM_DEFINITIONS})
-    target_include_directories(${PROJECT_NAME} PUBLIC ${LLVM_INCLUDE_DIRS})
-
-    if(BUILD_SHARED_LIBS)
-        target_link_libraries(${PROJECT_NAME} PUBLIC ${LLVM_LIBRARIES})
-    endif()
-endif()
-
 set_target_properties(${PROJECT_NAME} PROPERTIES
    VERSION ${PROJECT_VERSION}
    FRAMEWORK FALSE
@@ -261,3 +252,9 @@ if(TARGET scc-sysc)
        INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} # headers
    )
 endif()
+
+project(elfio-test)
+find_package(Boost COMPONENTS program_options thread REQUIRED)
+
+add_executable(${PROJECT_NAME} src/elfio.cpp)
+target_link_libraries(${PROJECT_NAME} PUBLIC elfio::elfio)
@@ -131,8 +131,6 @@ struct ${coreDef.name.toLowerCase()}: public arch_if {

    uint8_t* get_regs_base_ptr() override;

-    inline uint64_t get_icount() { return reg.icount; }
-
    inline bool should_stop() { return interrupt_sim; }

    inline uint64_t stop_code() { return interrupt_sim; }
@@ -141,8 +139,6 @@ struct ${coreDef.name.toLowerCase()}: public arch_if {

    virtual iss::sync_type needed_sync() const { return iss::NO_SYNC; }

-    inline uint32_t get_last_branch() { return reg.last_branch; }
-

 #pragma pack(push, 1)
    struct ${coreDef.name}_regs {<%
@@ -45,17 +45,17 @@ namespace interp {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -66,17 +66,17 @@ namespace llvm {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -88,17 +88,17 @@ namespace tcc {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -110,17 +110,17 @@ namespace asmjit {
 using namespace sysc;
 volatile std::array<bool, ${array_count}> ${coreDef.name.toLowerCase()}_init = {
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|m_p|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        }),
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%if(coreDef.name.toLowerCase()=="tgc5d" || coreDef.name.toLowerCase()=="tgc5e") {%>,
        iss_factory::instance().register_creator("${coreDef.name.toLowerCase()}|mu_p_clic_pmp|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-            auto* cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+            auto* cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
            auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::${coreDef.name.toLowerCase()}, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_EXT_N | iss::arch::FEAT_CLIC)>>(cc);
            return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::${coreDef.name.toLowerCase()}*>(cpu), gdb_port)}};
        })<%}%>
@@ -38,7 +38,9 @@
 #include <asmjit/asmjit.h>
 #include <util/logging.h>
 #include <iss/instruction_decoder.h>
-
+<%def fcsr = registers.find {it.name=='FCSR'}
+if(fcsr != null) {%>
+#include <vm/fp_functions.h><%}%>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -88,23 +90,24 @@ protected:
    using super::write_reg_to_mem;
    using super::gen_read_mem;
    using super::gen_write_mem;
-    using super::gen_wait;
    using super::gen_leave;
    using super::gen_sync;
   
    using this_class = vm_impl<ARCH>;
    using compile_func = continuation_e (this_class::*)(virt_addr_t&, code_word_t, jit_holder&);

-    continuation_e gen_single_inst_behavior(virt_addr_t&, unsigned int &, jit_holder&) override;
+    continuation_e gen_single_inst_behavior(virt_addr_t&, jit_holder&) override;
    enum globals_e {TVAL = 0, GLOBALS_SIZE};
    void gen_block_prologue(jit_holder& jh) override;
    void gen_block_epilogue(jit_holder& jh) override;
    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
-
+<%if(fcsr != null) {%>
+    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
+<%}%>
    void gen_instr_prologue(jit_holder& jh);
    void gen_instr_epilogue(jit_holder& jh);
    inline void gen_raise(jit_holder& jh, uint16_t trap_id, uint16_t cause);
-    template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>> void gen_set_tval(jit_holder& jh, T new_tval) ;
+    template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type> void gen_set_tval(jit_holder& jh, T new_tval) ;
    void gen_set_tval(jit_holder& jh, x86_reg_t _new_tval) ;

    template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
@@ -112,7 +115,10 @@ protected:
        auto mask = (1ULL<<W) - 1;
        auto sign_mask = 1ULL<<(W-1);
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
-    } 
+    }
+<%functions.each{ it.eachLine { %>
+    ${it}<%}%>
+<%}%>
 private:
    /****************************************************************************
     * start opcode definitions
@@ -195,7 +201,7 @@ private:
        gen_raise(jh, 0, 2);
        gen_sync(jh, POST_SYNC, instr_descr.size());
        gen_instr_epilogue(jh);
-        return BRANCH;
+        return ILLEGAL_INSTR;
    }
 };

@@ -215,7 +221,7 @@ vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
    }()) {}

 template <typename ARCH>
-continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, jit_holder& jh) {
+continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, jit_holder& jh) {
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
@@ -224,10 +230,9 @@ continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned
        paddr = this->core.virt2phys(pc);
    auto res = this->core.read(paddr, 4, data);
    if (res != iss::Ok)
-        throw trap_access(TRAP_ID, pc.val);
+        return ILLEGAL_FETCH;
    if (instr == 0x0000006f || (instr&0xffff)==0xa001)
-        throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-    ++inst_cnt;
+        return JUMP_TO_SELF;
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
@@ -257,6 +262,7 @@ void vm_impl<ARCH>::gen_instr_epilogue(jit_holder& jh) {
    cmp(cc, current_trap_state, 0);
    cc.jne(jh.trap_entry);
    cc.inc(get_ptr_for(jh, traits::ICOUNT));
+    cc.inc(get_ptr_for(jh, traits::CYCLE));
 }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_block_prologue(jit_holder& jh){
@@ -302,6 +308,7 @@ inline void vm_impl<ARCH>::gen_raise(jit_holder& jh, uint16_t trap_id, uint16_t
    auto tmp1 = get_reg_for(cc, traits::TRAP_STATE);
    mov(cc, tmp1, 0x80ULL << 24 | (cause << 16) | trap_id);
    mov(cc, get_ptr_for(jh, traits::TRAP_STATE), tmp1);
+    cc.jmp(jh.trap_entry);
 }
 template <typename ARCH>
 template <typename T, typename>
@@ -310,8 +317,8 @@ void vm_impl<ARCH>::gen_set_tval(jit_holder& jh, T new_tval) {
    }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_set_tval(jit_holder& jh, x86_reg_t _new_tval) {
-    if(std::holds_alternative<x86::Gp>(_new_tval)) {
-        x86::Gp new_tval = std::get<x86::Gp>(_new_tval);
+    if(nonstd::holds_alternative<x86::Gp>(_new_tval)) {
+        x86::Gp new_tval = nonstd::get<x86::Gp>(_new_tval);
        if(new_tval.size() < 8)
            new_tval = gen_ext_Gp(jh.cc, new_tval, 64, false);
        mov(jh.cc, jh.globals[TVAL], new_tval);
@@ -199,9 +199,6 @@ template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
    volatile CODE_WORD x = insn;
    insn = 2 * x;
 }
-
-template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
-
 // according to
 // https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation
 #ifdef __GCC__
@@ -257,17 +254,21 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
    while(!this->core.should_stop() &&
            !(is_icount_limit_enabled(cond) && icount >= count_limit) &&
            !(is_fcount_limit_enabled(cond) && fetch_count >= count_limit)){
-        fetch_count++;
+        if(this->debugging_enabled())
+            this->tgt_adapter->check_continue(*PC);
+        pc.val=*PC;
        if(fetch_ins(pc, data)!=iss::Ok){
-            this->do_sync(POST_SYNC, std::numeric_limits<unsigned>::max());
-            pc.val = super::core.enter_trap(std::numeric_limits<uint64_t>::max(), pc.val, 0);
+            if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
+            process_spawn_blocks();
+            if(this->sync_exec && POST_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
+            pc.val = super::core.enter_trap(arch::traits<ARCH>::RV_CAUSE_FETCH_ACCESS<<16, pc.val, 0);
        } else {
            if (is_jump_to_self_enabled(cond) &&
                    (instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
            uint32_t inst_index = instr_decoder.decode_instr(instr);
            opcode_e inst_id = arch::traits<ARCH>::opcode_e::MAX_OPCODE;;
            if(inst_index <instr_descr.size())
-                inst_id = instr_descr.at(instr_decoder.decode_instr(instr)).op;
+                inst_id = instr_descr[inst_index].op;

            // pre execution stuff
            this->core.reg.last_branch = 0;
@@ -279,6 +280,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    <%}%>if(this->disass_enabled){
                        /* generate console output when executing the command */<%instr.disass.eachLine{%>
                        ${it}<%}%>
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers<%instr.usedVariables.each{ k,v->
                    if(v.isArray) {%>
@@ -310,11 +312,11 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                icount++;
                instret++;
            }
-            cycle++;
-            pc.val=*NEXT_PC;
-            this->core.reg.PC = this->core.reg.NEXT_PC;
+            *PC = *NEXT_PC;
            this->core.reg.trap_state =  this->core.reg.pending_trap;
        }
+        fetch_count++;
+        cycle++;
    }
    return pc;
 }
@@ -37,7 +37,9 @@
 #include <iss/llvm/vm_base.h>
 #include <util/logging.h>
 #include <iss/instruction_decoder.h>
-
+<%def fcsr = registers.find {it.name=='FCSR'}
+if(fcsr != null) {%>
+#include <vm/fp_functions.h><%}%>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -83,7 +85,9 @@ protected:
    using vm_base<ARCH>::get_reg_ptr;

    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
-
+<%if(fcsr != null) {%>
+    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
+<%}%>
    template <typename T> inline ConstantInt *size(T type) {
        return ConstantInt::get(getContext(), APInt(32, type->getType()->getScalarSizeInBits()));
    }
@@ -97,7 +101,7 @@ protected:
        return super::gen_cond_assign(cond, this->gen_ext(trueVal, size), this->gen_ext(falseVal, size));
    }

-    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, unsigned int &, BasicBlock *) override;
+    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, BasicBlock *) override;

    void gen_leave_behavior(BasicBlock *leave_blk) override;
    void gen_raise_trap(uint16_t trap_id, uint16_t cause);
@@ -130,8 +134,10 @@ protected:
        auto mask = (1ULL<<W) - 1;
        auto sign_mask = 1ULL<<(W-1);
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
-    }   
-
+    }
+<%functions.each{ it.eachLine { %>
+    ${it}<%}%>
+<%}%>
 private:
    /****************************************************************************
     * start opcode definitions
@@ -198,7 +204,7 @@ private:
            };
            this->builder.CreateCall(this->mod->getFunction("print_disass"), args);
        }
-		this->gen_sync(iss::PRE_SYNC, instr_descr.size());
+        this->gen_sync(iss::PRE_SYNC, instr_descr.size());
        this->builder.CreateStore(this->builder.CreateLoad(this->get_typeptr(traits::NEXT_PC), get_reg_ptr(traits::NEXT_PC), true),
                                   get_reg_ptr(traits::PC), true);
        this->builder.CreateStore(
@@ -212,7 +218,7 @@ private:
        bb = this->leave_blk;
        this->gen_instr_epilogue(bb);
        this->builder.CreateBr(bb);
-        return std::make_tuple(BRANCH, nullptr);
+        return std::make_tuple(ILLEGAL_INSTR, nullptr);
    }    
 };

@@ -238,7 +244,7 @@ vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)

 template <typename ARCH>
 std::tuple<continuation_e, BasicBlock *>
-vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, BasicBlock *this_block) {
+vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, BasicBlock *this_block) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
@@ -247,20 +253,13 @@ vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt,
    auto *const data = (uint8_t *)&instr;
    if(this->core.has_mmu())
        paddr = this->core.virt2phys(pc);
-    //TODO: re-add page handling
-//    if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
-//        auto res = this->core.read(paddr, 2, data);
-//        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
-//        if ((instr & 0x3) == 0x3) { // this is a 32bit instruction
-//            res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
-//        }
-//    } else {
    auto res = this->core.read(paddr, 4, data);
-    if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
-//    }
-    if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-    // curr pc on stack
-    ++inst_cnt;
+    if (res != iss::Ok) 
+        return std::make_tuple(ILLEGAL_FETCH, nullptr);
+    if (instr == 0x0000006f || (instr&0xffff)==0xa001){
+        this->builder.CreateBr(this->leave_blk);
+        return std::make_tuple(JUMP_TO_SELF, nullptr);
+        }
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
@@ -281,6 +280,7 @@ template <typename ARCH>
 void vm_impl<ARCH>::gen_raise_trap(uint16_t trap_id, uint16_t cause) {
    auto *TRAP_val = this->gen_const(32, 0x80 << 24 | (cause << 16) | trap_id);
    this->builder.CreateStore(TRAP_val, get_reg_ptr(traits::TRAP_STATE), true);
+    this->builder.CreateBr(this->trap_blk);
 }

 template <typename ARCH>
@@ -341,6 +341,10 @@ void vm_impl<ARCH>::gen_instr_epilogue(BasicBlock *bb) {
    auto* icount_val = this->builder.CreateAdd(
        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::ICOUNT), get_reg_ptr(arch::traits<ARCH>::ICOUNT)), this->gen_const(64U, 1));
    this->builder.CreateStore(icount_val, get_reg_ptr(arch::traits<ARCH>::ICOUNT), false);
+    //increment cyclecount
+    auto* cycle_val = this->builder.CreateAdd(
+        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::CYCLE), get_reg_ptr(arch::traits<ARCH>::CYCLE)), this->gen_const(64U, 1));
+    this->builder.CreateStore(cycle_val, get_reg_ptr(arch::traits<ARCH>::CYCLE), false);
 }

 } // namespace ${coreDef.name.toLowerCase()}
@@ -383,4 +387,4 @@ volatile std::array<bool, 2> dummy = {
 };
 }
 }
-// clang-format on
+// clang-format on
@@ -38,7 +38,9 @@
 #include <util/logging.h>
 #include <sstream>
 #include <iss/instruction_decoder.h>
-
+<%def fcsr = registers.find {it.name=='FCSR'}
+if(fcsr != null) {%>
+#include <vm/fp_functions.h><%}%>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -81,16 +83,21 @@ protected:
    using vm_base<ARCH>::get_reg_ptr;

    using this_class = vm_impl<ARCH>;
-    using compile_ret_t = std::tuple<continuation_e>;
+    using compile_ret_t = continuation_e;
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr, tu_builder&);

    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
+<%
+if(fcsr != null) {%>
+    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
+<%}%>
+    void add_prologue(tu_builder& tu) override;

    void setup_module(std::string m) override {
        super::setup_module(m);
    }

-    compile_ret_t gen_single_inst_behavior(virt_addr_t &, unsigned int &, tu_builder&) override;
+    compile_ret_t gen_single_inst_behavior(virt_addr_t &, tu_builder&) override;

    void gen_trap_behavior(tu_builder& tu) override;

@@ -98,8 +105,6 @@ protected:

    void gen_leave_trap(tu_builder& tu, unsigned lvl);

-    void gen_wait(tu_builder& tu, unsigned type);
-
    inline void gen_set_tval(tu_builder& tu, uint64_t new_tval);

    inline void gen_set_tval(tu_builder& tu, value new_tval);
@@ -133,6 +138,9 @@ protected:
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
    }

+<%functions.each{ it.eachLine { %>
+    ${it}<%}%>
+<%}%>
 private:
    /****************************************************************************
     * start opcode definitions
@@ -163,10 +171,12 @@ private:
        <%}%>if(this->disass_enabled){
            /* generate console output when executing the command */<%instr.disass.eachLine{%>
            ${it}<%}%>
+            tu("print_disass(core_ptr, {:#x}, \"{}\");", pc.val, mnemonic);
        }
        auto cur_pc_val = tu.constant(pc.val, traits::reg_bit_widths[traits::PC]);
        pc=pc+ ${instr.length/8};
        gen_set_pc(tu, pc, traits::NEXT_PC);
+        tu("(*cycle)++;");
        tu.open_scope();
        this->gen_set_tval(tu, instr);
        <%instr.behavior.eachLine{%>${it}
@@ -187,11 +197,11 @@ private:
            tu("print_disass(core_ptr, {:#x}, \"{}\");", pc.val, std::string("illegal_instruction"));
        }
        pc = pc + ((instr & 3) == 3 ? 4 : 2);
-        gen_raise_trap(tu, 0, 2);     // illegal instruction trap
+        gen_raise_trap(tu, 0, static_cast<int32_t>(traits:: RV_CAUSE_ILLEGAL_INSTRUCTION));
        this->gen_set_tval(tu, instr);
        vm_impl::gen_sync(tu, iss::POST_SYNC, instr_descr.size());
        vm_impl::gen_trap_check(tu);
-        return BRANCH;
+        return ILLEGAL_INSTR;
    }
 };

@@ -216,28 +226,19 @@ vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
    }()) {}

 template <typename ARCH>
-std::tuple<continuation_e>
-vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) {
+continuation_e
+vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, tu_builder& tu) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
    if(this->core.has_mmu())
        paddr = this->core.virt2phys(pc);
-    //TODO: re-add page handling
-//    if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
-//        auto res = this->core.read(paddr, 2, data);
-//        if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
-//        if ((insn & 0x3) == 0x3) { // this is a 32bit instruction
-//            res = this->core.read(this->core.v2p(pc + 2), 2, data + 2);
-//        }
-//    } else {
    auto res = this->core.read(paddr, 4, reinterpret_cast<uint8_t*>(&instr));
-    if (res != iss::Ok) throw trap_access(TRAP_ID, pc.val);
-//    }
-    if (instr == 0x0000006f || (instr&0xffff)==0xa001) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-    // curr pc on stack
-    ++inst_cnt;
+    if (res != iss::Ok)
+        return ILLEGAL_FETCH;
+    if (instr == 0x0000006f || (instr&0xffff)==0xa001) 
+        return JUMP_TO_SELF;
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
@@ -258,9 +259,6 @@ template <typename ARCH> void vm_impl<ARCH>::gen_leave_trap(tu_builder& tu, unsi
    tu.store(traits::LAST_BRANCH, tu.constant(static_cast<int>(UNKNOWN_JUMP), 32));
 }

-template <typename ARCH> void vm_impl<ARCH>::gen_wait(tu_builder& tu, unsigned type) {
-}
-
 template <typename ARCH> void vm_impl<ARCH>::gen_set_tval(tu_builder& tu, uint64_t new_tval) {
    tu(fmt::format("tval = {};", new_tval));
 }
@@ -275,6 +273,41 @@ template <typename ARCH> void vm_impl<ARCH>::gen_trap_behavior(tu_builder& tu) {
    tu.store(traits::LAST_BRANCH, tu.constant(static_cast<int>(UNKNOWN_JUMP),32));
    tu("return *next_pc;");
 }
+template <typename ARCH> void vm_impl<ARCH>::add_prologue(tu_builder& tu){
+    std::ostringstream os;
+    os << tu.add_reg_ptr("trap_state", arch::traits<ARCH>::TRAP_STATE, this->regs_base_ptr);
+    os << tu.add_reg_ptr("pending_trap", arch::traits<ARCH>::PENDING_TRAP, this->regs_base_ptr);
+    os << tu.add_reg_ptr("cycle", arch::traits<ARCH>::CYCLE, this->regs_base_ptr);
+<%if(fcsr != null) {%>
+    os << "uint32_t (*fget_flags)()=" << (uintptr_t)&fget_flags << ";\\n";
+    os << "uint32_t (*fadd_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fadd_s << ";\\n";
+    os << "uint32_t (*fsub_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fsub_s << ";\\n";
+    os << "uint32_t (*fmul_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fmul_s << ";\\n";
+    os << "uint32_t (*fdiv_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fdiv_s << ";\\n";
+    os << "uint32_t (*fsqrt_s)(uint32_t v1, uint8_t mode)=" << (uintptr_t)&fsqrt_s << ";\\n";
+    os << "uint32_t (*fcmp_s)(uint32_t v1, uint32_t v2, uint32_t op)=" << (uintptr_t)&fcmp_s << ";\\n";
+    os << "uint32_t (*fcvt_s)(uint32_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_s << ";\\n";
+    os << "uint32_t (*fmadd_s)(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t op, uint8_t mode)=" << (uintptr_t)&fmadd_s << ";\\n";
+    os << "uint32_t (*fsel_s)(uint32_t v1, uint32_t v2, uint32_t op)=" << (uintptr_t)&fsel_s << ";\\n";
+    os << "uint32_t (*fclass_s)( uint32_t v1 )=" << (uintptr_t)&fclass_s << ";\\n";
+    os << "uint32_t (*fconv_d2f)(uint64_t v1, uint8_t mode)=" << (uintptr_t)&fconv_d2f << ";\\n";
+    os << "uint64_t (*fconv_f2d)(uint32_t v1, uint8_t mode)=" << (uintptr_t)&fconv_f2d << ";\\n";
+    os << "uint64_t (*fadd_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fadd_d << ";\\n";
+    os << "uint64_t (*fsub_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fsub_d << ";\\n";
+    os << "uint64_t (*fmul_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fmul_d << ";\\n";
+    os << "uint64_t (*fdiv_d)(uint64_t v1, uint64_t v2, uint8_t mode)=" << (uintptr_t)&fdiv_d << ";\\n";
+    os << "uint64_t (*fsqrt_d)(uint64_t v1, uint8_t mode)=" << (uintptr_t)&fsqrt_d << ";\\n";
+    os << "uint64_t (*fcmp_d)(uint64_t v1, uint64_t v2, uint32_t op)=" << (uintptr_t)&fcmp_d << ";\\n";
+    os << "uint64_t (*fcvt_d)(uint64_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_d << ";\\n";
+    os << "uint64_t (*fmadd_d)(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mode)=" << (uintptr_t)&fmadd_d << ";\\n";
+    os << "uint64_t (*fsel_d)(uint64_t v1, uint64_t v2, uint32_t op)=" << (uintptr_t)&fsel_d << ";\\n";
+    os << "uint64_t (*fclass_d)(uint64_t v1  )=" << (uintptr_t)&fclass_d << ";\\n";
+    os << "uint64_t (*fcvt_32_64)(uint32_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_32_64 << ";\\n";
+    os << "uint32_t (*fcvt_64_32)(uint64_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_64_32 << ";\\n";
+    os << "uint32_t (*unbox_s)(uint64_t v)=" << (uintptr_t)&unbox_s << ";\\n";
+    <%}%>
+    tu.add_prologue(os.str());
+}

 } // namespace ${coreDef.name.toLowerCase()}

@@ -37,10 +37,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef specialize_h
 #define specialize_h 1

-#include <stdbool.h>
-#include <stdint.h>
 #include "primitiveTypes.h"
 #include "softfloat.h"
+#include <stdbool.h>
+#include <stdint.h>

 /*----------------------------------------------------------------------------
 | Default value for 'softfloat_detectTininess'.
@@ -53,21 +53,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *----------------------------------------------------------------------------*/
 #define ui32_fromPosOverflow 0xFFFFFFFF
 #define ui32_fromNegOverflow 0xFFFFFFFF
-#define ui32_fromNaN         0xFFFFFFFF
-#define i32_fromPosOverflow  (-0x7FFFFFFF - 1)
-#define i32_fromNegOverflow  (-0x7FFFFFFF - 1)
-#define i32_fromNaN          (-0x7FFFFFFF - 1)
+#define ui32_fromNaN 0xFFFFFFFF
+#define i32_fromPosOverflow (-0x7FFFFFFF - 1)
+#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
+#define i32_fromNaN (-0x7FFFFFFF - 1)

 /*----------------------------------------------------------------------------
 | The values to return on conversions to 64-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
-#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
-#define ui64_fromNegOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
-#define ui64_fromNaN         UINT64_C( 0xFFFFFFFFFFFFFFFF )
-#define i64_fromPosOverflow  (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
-#define i64_fromNegOverflow  (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
-#define i64_fromNaN          (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
+#define ui64_fromPosOverflow UINT64_C(0xFFFFFFFFFFFFFFFF)
+#define ui64_fromNegOverflow UINT64_C(0xFFFFFFFFFFFFFFFF)
+#define ui64_fromNaN UINT64_C(0xFFFFFFFFFFFFFFFF)
+#define i64_fromPosOverflow (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)
+#define i64_fromNegOverflow (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)
+#define i64_fromNaN (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)

 /*----------------------------------------------------------------------------
 | "Common NaN" structure, used to transfer NaN representations from one format
@@ -92,7 +92,7 @@ struct commonNaN {
 | 16-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
+#define softfloat_isSigNaNF16UI(uiA) ((((uiA)&0x7E00) == 0x7C00) && ((uiA)&0x01FF))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
@@ -100,13 +100,13 @@ struct commonNaN {
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr );
+void softfloat_f16UIToCommonNaN(uint_fast16_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );
+uint_fast16_t softfloat_commonNaNToF16UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 16-bit floating-
@@ -114,15 +114,14 @@ uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast16_t
- softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
+uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);

 /*----------------------------------------------------------------------------
 | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
 | 16-bit brain floating-point (BF16) signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F))
+#define softfloat_isSigNaNBF16UI(uiA) ((((uiA)&0x7FC0) == 0x7F80) && ((uiA)&0x003F))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
@@ -130,13 +129,13 @@ uint_fast16_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_bf16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr );
+void softfloat_bf16UIToCommonNaN(uint_fast16_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr );
+uint_fast16_t softfloat_commonNaNToBF16UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
@@ -148,7 +147,7 @@ uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr );
 | 32-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))
+#define softfloat_isSigNaNF32UI(uiA) ((((uiA)&0x7FC00000) == 0x7F800000) && ((uiA)&0x003FFFFF))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 32-bit floating-point NaN, converts
@@ -156,13 +155,13 @@ uint_fast16_t softfloat_commonNaNToBF16UI( const struct commonNaN *aPtr );
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr );
+void softfloat_f32UIToCommonNaN(uint_fast32_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
+uint_fast32_t softfloat_commonNaNToF32UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
@@ -170,20 +169,20 @@ uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast32_t
- softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB );
+uint_fast32_t softfloat_propagateNaNF32UI(uint_fast32_t uiA, uint_fast32_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 64-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF64UI UINT64_C( 0xFFF8000000000000 )
+#define defaultNaNF64UI UINT64_C(0xFFF8000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when 64-bit unsigned integer 'uiA' has the bit pattern of a
 | 64-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF64UI( uiA ) ((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && ((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))
+#define softfloat_isSigNaNF64UI(uiA)                                                                                                       \
+    ((((uiA)&UINT64_C(0x7FF8000000000000)) == UINT64_C(0x7FF0000000000000)) && ((uiA)&UINT64_C(0x0007FFFFFFFFFFFF)))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 64-bit floating-point NaN, converts
@@ -191,13 +190,13 @@ uint_fast32_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr );
+void softfloat_f64UIToCommonNaN(uint_fast64_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
+uint_fast64_t softfloat_commonNaNToF64UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 64-bit floating-
@@ -205,14 +204,13 @@ uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast64_t
- softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB );
+uint_fast64_t softfloat_propagateNaNF64UI(uint_fast64_t uiA, uint_fast64_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 80-bit extended floating-point NaN.
 *----------------------------------------------------------------------------*/
 #define defaultNaNExtF80UI64 0xFFFF
-#define defaultNaNExtF80UI0  UINT64_C( 0xC000000000000000 )
+#define defaultNaNExtF80UI0 UINT64_C(0xC000000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when the 80-bit unsigned integer formed from concatenating
@@ -220,7 +218,8 @@ uint_fast64_t
 | floating-point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNExtF80UI( uiA64, uiA0 ) ((((uiA64) & 0x7FFF) == 0x7FFF) && ! ((uiA0) & UINT64_C( 0x4000000000000000 )) && ((uiA0) & UINT64_C( 0x3FFFFFFFFFFFFFFF )))
+#define softfloat_isSigNaNExtF80UI(uiA64, uiA0)                                                                                            \
+    ((((uiA64)&0x7FFF) == 0x7FFF) && !((uiA0)&UINT64_C(0x4000000000000000)) && ((uiA0)&UINT64_C(0x3FFFFFFFFFFFFFFF)))

 #ifdef SOFTFLOAT_FAST_INT64

@@ -236,16 +235,14 @@ uint_fast64_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_extF80UIToCommonNaN(
-     uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );
+void softfloat_extF80UIToCommonNaN(uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and returns the bit pattern of this value as an unsigned
 | integer.
 *----------------------------------------------------------------------------*/
-struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
+struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -256,19 +253,13 @@ struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
 | result.  If either original floating-point value is a signaling NaN, the
 | invalid exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNExtF80UI(
-     uint_fast16_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast16_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNExtF80UI(uint_fast16_t uiA64, uint_fast64_t uiA0, uint_fast16_t uiB64, uint_fast64_t uiB0);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF128UI64 UINT64_C( 0xFFFF800000000000 )
-#define defaultNaNF128UI0  UINT64_C( 0 )
+#define defaultNaNF128UI64 UINT64_C(0xFFFF800000000000)
+#define defaultNaNF128UI0 UINT64_C(0)

 /*----------------------------------------------------------------------------
 | Returns true when the 128-bit unsigned integer formed from concatenating
@@ -276,7 +267,8 @@ struct uint128
 | point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))
+#define softfloat_isSigNaNF128UI(uiA64, uiA0)                                                                                              \
+    ((((uiA64)&UINT64_C(0x7FFF800000000000)) == UINT64_C(0x7FFF000000000000)) && ((uiA0) || ((uiA64)&UINT64_C(0x00007FFFFFFFFFFF))))

 /*----------------------------------------------------------------------------
 | Assuming the unsigned integer formed from concatenating 'uiA64' and 'uiA0'
@@ -285,15 +277,13 @@ struct uint128
 | pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid exception
 | is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_f128UIToCommonNaN(
-     uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );
+void softfloat_f128UIToCommonNaN(uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
+struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN*);

 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -304,13 +294,7 @@ struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
 | If either original floating-point value is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNF128UI(
-     uint_fast64_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast64_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNF128UI(uint_fast64_t uiA64, uint_fast64_t uiA0, uint_fast64_t uiB64, uint_fast64_t uiB0);

 #else

@@ -325,18 +309,14 @@ struct uint128
 | common NaN at the location pointed to by 'zPtr'.  If the NaN is a signaling
 | NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_extF80MToCommonNaN(
-     const struct extFloat80M *aSPtr, struct commonNaN *zPtr );
+void softfloat_extF80MToCommonNaN(const struct extFloat80M* aSPtr, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and stores this NaN at the location pointed to by
 | 'zSPtr'.
 *----------------------------------------------------------------------------*/
-void
- softfloat_commonNaNToExtF80M(
-     const struct commonNaN *aPtr, struct extFloat80M *zSPtr );
+void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr);

 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 80-bit extended floating-point values
@@ -344,12 +324,7 @@ void
 | at the location pointed to by 'zSPtr'.  If either original floating-point
 | value is a signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNExtF80M(
-     const struct extFloat80M *aSPtr,
-     const struct extFloat80M *bSPtr,
-     struct extFloat80M *zSPtr
- );
+void softfloat_propagateNaNExtF80M(const struct extFloat80M* aSPtr, const struct extFloat80M* bSPtr, struct extFloat80M* zSPtr);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
@@ -357,7 +332,7 @@ void
 #define defaultNaNF128UI96 0xFFFF8000
 #define defaultNaNF128UI64 0
 #define defaultNaNF128UI32 0
-#define defaultNaNF128UI0  0
+#define defaultNaNF128UI0 0

 /*----------------------------------------------------------------------------
 | Assuming the 128-bit floating-point value pointed to by 'aWPtr' is a NaN,
@@ -367,8 +342,7 @@ void
 | four 32-bit elements that concatenate in the platform's normal endian order
 | to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_f128MToCommonNaN( const uint32_t *aWPtr, struct commonNaN *zPtr );
+void softfloat_f128MToCommonNaN(const uint32_t* aWPtr, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
@@ -376,8 +350,7 @@ void
 | 'zWPtr' points to an array of four 32-bit elements that concatenate in the
 | platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr );
+void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr);

 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 128-bit floating-point values pointed to by
@@ -387,11 +360,8 @@ void
 | and 'zWPtr' points to an array of four 32-bit elements that concatenate in
 | the platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNF128M(
-     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr );
+void softfloat_propagateNaNF128M(const uint32_t* aWPtr, const uint32_t* bWPtr, uint32_t* zWPtr);

 #endif

 #endif
-
@@ -37,10 +37,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef specialize_h
 #define specialize_h 1

-#include <stdbool.h>
-#include <stdint.h>
 #include "primitiveTypes.h"
 #include "softfloat.h"
+#include <stdbool.h>
+#include <stdint.h>

 /*----------------------------------------------------------------------------
 | Default value for 'softfloat_detectTininess'.
@@ -53,21 +53,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *----------------------------------------------------------------------------*/
 #define ui32_fromPosOverflow 0xFFFFFFFF
 #define ui32_fromNegOverflow 0xFFFFFFFF
-#define ui32_fromNaN         0xFFFFFFFF
-#define i32_fromPosOverflow  (-0x7FFFFFFF - 1)
-#define i32_fromNegOverflow  (-0x7FFFFFFF - 1)
-#define i32_fromNaN          (-0x7FFFFFFF - 1)
+#define ui32_fromNaN 0xFFFFFFFF
+#define i32_fromPosOverflow (-0x7FFFFFFF - 1)
+#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
+#define i32_fromNaN (-0x7FFFFFFF - 1)

 /*----------------------------------------------------------------------------
 | The values to return on conversions to 64-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
-#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
-#define ui64_fromNegOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
-#define ui64_fromNaN         UINT64_C( 0xFFFFFFFFFFFFFFFF )
-#define i64_fromPosOverflow  (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
-#define i64_fromNegOverflow  (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
-#define i64_fromNaN          (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
+#define ui64_fromPosOverflow UINT64_C(0xFFFFFFFFFFFFFFFF)
+#define ui64_fromNegOverflow UINT64_C(0xFFFFFFFFFFFFFFFF)
+#define ui64_fromNaN UINT64_C(0xFFFFFFFFFFFFFFFF)
+#define i64_fromPosOverflow (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)
+#define i64_fromNegOverflow (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)
+#define i64_fromNaN (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)

 /*----------------------------------------------------------------------------
 | "Common NaN" structure, used to transfer NaN representations from one format
@@ -92,7 +92,7 @@ struct commonNaN {
 | 16-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
+#define softfloat_isSigNaNF16UI(uiA) ((((uiA)&0x7E00) == 0x7C00) && ((uiA)&0x01FF))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
@@ -100,13 +100,13 @@ struct commonNaN {
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr );
+void softfloat_f16UIToCommonNaN(uint_fast16_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );
+uint_fast16_t softfloat_commonNaNToF16UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 16-bit floating-
@@ -114,8 +114,7 @@ uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast16_t
- softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
+uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
@@ -127,7 +126,7 @@ uint_fast16_t
 | 32-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))
+#define softfloat_isSigNaNF32UI(uiA) ((((uiA)&0x7FC00000) == 0x7F800000) && ((uiA)&0x003FFFFF))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 32-bit floating-point NaN, converts
@@ -135,13 +134,13 @@ uint_fast16_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr );
+void softfloat_f32UIToCommonNaN(uint_fast32_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
+uint_fast32_t softfloat_commonNaNToF32UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
@@ -149,20 +148,20 @@ uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast32_t
- softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB );
+uint_fast32_t softfloat_propagateNaNF32UI(uint_fast32_t uiA, uint_fast32_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 64-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF64UI UINT64_C( 0xFFF8000000000000 )
+#define defaultNaNF64UI UINT64_C(0xFFF8000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when 64-bit unsigned integer 'uiA' has the bit pattern of a
 | 64-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF64UI( uiA ) ((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && ((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))
+#define softfloat_isSigNaNF64UI(uiA)                                                                                                       \
+    ((((uiA)&UINT64_C(0x7FF8000000000000)) == UINT64_C(0x7FF0000000000000)) && ((uiA)&UINT64_C(0x0007FFFFFFFFFFFF)))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 64-bit floating-point NaN, converts
@@ -170,13 +169,13 @@ uint_fast32_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr );
+void softfloat_f64UIToCommonNaN(uint_fast64_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
+uint_fast64_t softfloat_commonNaNToF64UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 64-bit floating-
@@ -184,14 +183,13 @@ uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast64_t
- softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB );
+uint_fast64_t softfloat_propagateNaNF64UI(uint_fast64_t uiA, uint_fast64_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 80-bit extended floating-point NaN.
 *----------------------------------------------------------------------------*/
 #define defaultNaNExtF80UI64 0xFFFF
-#define defaultNaNExtF80UI0  UINT64_C( 0xC000000000000000 )
+#define defaultNaNExtF80UI0 UINT64_C(0xC000000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when the 80-bit unsigned integer formed from concatenating
@@ -199,7 +197,8 @@ uint_fast64_t
 | floating-point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNExtF80UI( uiA64, uiA0 ) ((((uiA64) & 0x7FFF) == 0x7FFF) && ! ((uiA0) & UINT64_C( 0x4000000000000000 )) && ((uiA0) & UINT64_C( 0x3FFFFFFFFFFFFFFF )))
+#define softfloat_isSigNaNExtF80UI(uiA64, uiA0)                                                                                            \
+    ((((uiA64)&0x7FFF) == 0x7FFF) && !((uiA0)&UINT64_C(0x4000000000000000)) && ((uiA0)&UINT64_C(0x3FFFFFFFFFFFFFFF)))

 #ifdef SOFTFLOAT_FAST_INT64

@@ -215,16 +214,14 @@ uint_fast64_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_extF80UIToCommonNaN(
-     uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );
+void softfloat_extF80UIToCommonNaN(uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and returns the bit pattern of this value as an unsigned
 | integer.
 *----------------------------------------------------------------------------*/
-struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
+struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -235,19 +232,13 @@ struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
 | result.  If either original floating-point value is a signaling NaN, the
 | invalid exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNExtF80UI(
-     uint_fast16_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast16_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNExtF80UI(uint_fast16_t uiA64, uint_fast64_t uiA0, uint_fast16_t uiB64, uint_fast64_t uiB0);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF128UI64 UINT64_C( 0xFFFF800000000000 )
-#define defaultNaNF128UI0  UINT64_C( 0 )
+#define defaultNaNF128UI64 UINT64_C(0xFFFF800000000000)
+#define defaultNaNF128UI0 UINT64_C(0)

 /*----------------------------------------------------------------------------
 | Returns true when the 128-bit unsigned integer formed from concatenating
@@ -255,7 +246,8 @@ struct uint128
 | point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))
+#define softfloat_isSigNaNF128UI(uiA64, uiA0)                                                                                              \
+    ((((uiA64)&UINT64_C(0x7FFF800000000000)) == UINT64_C(0x7FFF000000000000)) && ((uiA0) || ((uiA64)&UINT64_C(0x00007FFFFFFFFFFF))))

 /*----------------------------------------------------------------------------
 | Assuming the unsigned integer formed from concatenating 'uiA64' and 'uiA0'
@@ -264,15 +256,13 @@ struct uint128
 | pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid exception
 | is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_f128UIToCommonNaN(
-     uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );
+void softfloat_f128UIToCommonNaN(uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
+struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN*);

 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -283,13 +273,7 @@ struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
 | If either original floating-point value is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNF128UI(
-     uint_fast64_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast64_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNF128UI(uint_fast64_t uiA64, uint_fast64_t uiA0, uint_fast64_t uiB64, uint_fast64_t uiB0);

 #else

@@ -304,18 +288,14 @@ struct uint128
 | common NaN at the location pointed to by 'zPtr'.  If the NaN is a signaling
 | NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_extF80MToCommonNaN(
-     const struct extFloat80M *aSPtr, struct commonNaN *zPtr );
+void softfloat_extF80MToCommonNaN(const struct extFloat80M* aSPtr, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and stores this NaN at the location pointed to by
 | 'zSPtr'.
 *----------------------------------------------------------------------------*/
-void
- softfloat_commonNaNToExtF80M(
-     const struct commonNaN *aPtr, struct extFloat80M *zSPtr );
+void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr);

 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 80-bit extended floating-point values
@@ -323,12 +303,7 @@ void
 | at the location pointed to by 'zSPtr'.  If either original floating-point
 | value is a signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNExtF80M(
-     const struct extFloat80M *aSPtr,
-     const struct extFloat80M *bSPtr,
-     struct extFloat80M *zSPtr
- );
+void softfloat_propagateNaNExtF80M(const struct extFloat80M* aSPtr, const struct extFloat80M* bSPtr, struct extFloat80M* zSPtr);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
@@ -336,7 +311,7 @@ void
 #define defaultNaNF128UI96 0xFFFF8000
 #define defaultNaNF128UI64 0
 #define defaultNaNF128UI32 0
-#define defaultNaNF128UI0  0
+#define defaultNaNF128UI0 0

 /*----------------------------------------------------------------------------
 | Assuming the 128-bit floating-point value pointed to by 'aWPtr' is a NaN,
@@ -346,8 +321,7 @@ void
 | four 32-bit elements that concatenate in the platform's normal endian order
 | to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_f128MToCommonNaN( const uint32_t *aWPtr, struct commonNaN *zPtr );
+void softfloat_f128MToCommonNaN(const uint32_t* aWPtr, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
@@ -355,8 +329,7 @@ void
 | 'zWPtr' points to an array of four 32-bit elements that concatenate in the
 | platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr );
+void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr);

 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 128-bit floating-point values pointed to by
@@ -366,11 +339,8 @@ void
 | and 'zWPtr' points to an array of four 32-bit elements that concatenate in
 | the platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNF128M(
-     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr );
+void softfloat_propagateNaNF128M(const uint32_t* aWPtr, const uint32_t* bWPtr, uint32_t* zWPtr);

 #endif

 #endif
-
@@ -37,10 +37,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef specialize_h
 #define specialize_h 1

-#include <stdbool.h>
-#include <stdint.h>
 #include "primitiveTypes.h"
 #include "softfloat.h"
+#include <stdbool.h>
+#include <stdint.h>

 /*----------------------------------------------------------------------------
 | Default value for 'softfloat_detectTininess'.
@@ -53,27 +53,29 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *----------------------------------------------------------------------------*/
 #define ui32_fromPosOverflow 0xFFFFFFFF
 #define ui32_fromNegOverflow 0
-#define ui32_fromNaN         0
-#define i32_fromPosOverflow  0x7FFFFFFF
-#define i32_fromNegOverflow  (-0x7FFFFFFF - 1)
-#define i32_fromNaN          0
+#define ui32_fromNaN 0
+#define i32_fromPosOverflow 0x7FFFFFFF
+#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
+#define i32_fromNaN 0

 /*----------------------------------------------------------------------------
 | The values to return on conversions to 64-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
-#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
+#define ui64_fromPosOverflow UINT64_C(0xFFFFFFFFFFFFFFFF)
 #define ui64_fromNegOverflow 0
-#define ui64_fromNaN         0
-#define i64_fromPosOverflow  INT64_C( 0x7FFFFFFFFFFFFFFF )
-#define i64_fromNegOverflow  (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
-#define i64_fromNaN          0
+#define ui64_fromNaN 0
+#define i64_fromPosOverflow INT64_C(0x7FFFFFFFFFFFFFFF)
+#define i64_fromNegOverflow (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)
+#define i64_fromNaN 0

 /*----------------------------------------------------------------------------
 | "Common NaN" structure, used to transfer NaN representations from one format
 | to another.
 *----------------------------------------------------------------------------*/
-struct commonNaN { char _unused; };
+struct commonNaN {
+    char _unused;
+};

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 16-bit floating-point NaN.
@@ -85,7 +87,7 @@ struct commonNaN { char _unused; };
 | 16-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
+#define softfloat_isSigNaNF16UI(uiA) ((((uiA)&0x7E00) == 0x7C00) && ((uiA)&0x01FF))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
@@ -93,13 +95,15 @@ struct commonNaN { char _unused; };
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f16UIToCommonNaN(uiA, zPtr)                                                                                              \
+    if(!((uiA)&0x0200))                                                                                                                    \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-#define softfloat_commonNaNToF16UI( aPtr ) ((uint_fast16_t) defaultNaNF16UI)
+#define softfloat_commonNaNToF16UI(aPtr) ((uint_fast16_t)defaultNaNF16UI)

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 16-bit floating-
@@ -107,8 +111,7 @@ struct commonNaN { char _unused; };
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast16_t
- softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
+uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
@@ -120,7 +123,7 @@ uint_fast16_t
 | 32-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))
+#define softfloat_isSigNaNF32UI(uiA) ((((uiA)&0x7FC00000) == 0x7F800000) && ((uiA)&0x003FFFFF))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 32-bit floating-point NaN, converts
@@ -128,13 +131,15 @@ uint_fast16_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_f32UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x00400000) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f32UIToCommonNaN(uiA, zPtr)                                                                                              \
+    if(!((uiA)&0x00400000))                                                                                                                \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-#define softfloat_commonNaNToF32UI( aPtr ) ((uint_fast32_t) defaultNaNF32UI)
+#define softfloat_commonNaNToF32UI(aPtr) ((uint_fast32_t)defaultNaNF32UI)

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
@@ -142,20 +147,20 @@ uint_fast16_t
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast32_t
- softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB );
+uint_fast32_t softfloat_propagateNaNF32UI(uint_fast32_t uiA, uint_fast32_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 64-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF64UI UINT64_C( 0x7FF8000000000000 )
+#define defaultNaNF64UI UINT64_C(0x7FF8000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when 64-bit unsigned integer 'uiA' has the bit pattern of a
 | 64-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF64UI( uiA ) ((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && ((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))
+#define softfloat_isSigNaNF64UI(uiA)                                                                                                       \
+    ((((uiA)&UINT64_C(0x7FF8000000000000)) == UINT64_C(0x7FF0000000000000)) && ((uiA)&UINT64_C(0x0007FFFFFFFFFFFF)))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 64-bit floating-point NaN, converts
@@ -163,13 +168,15 @@ uint_fast32_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_f64UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & UINT64_C( 0x0008000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f64UIToCommonNaN(uiA, zPtr)                                                                                              \
+    if(!((uiA)&UINT64_C(0x0008000000000000)))                                                                                              \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-#define softfloat_commonNaNToF64UI( aPtr ) ((uint_fast64_t) defaultNaNF64UI)
+#define softfloat_commonNaNToF64UI(aPtr) ((uint_fast64_t)defaultNaNF64UI)

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 64-bit floating-
@@ -177,14 +184,13 @@ uint_fast32_t
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast64_t
- softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB );
+uint_fast64_t softfloat_propagateNaNF64UI(uint_fast64_t uiA, uint_fast64_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 80-bit extended floating-point NaN.
 *----------------------------------------------------------------------------*/
 #define defaultNaNExtF80UI64 0x7FFF
-#define defaultNaNExtF80UI0  UINT64_C( 0xC000000000000000 )
+#define defaultNaNExtF80UI0 UINT64_C(0xC000000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when the 80-bit unsigned integer formed from concatenating
@@ -192,7 +198,8 @@ uint_fast64_t
 | floating-point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNExtF80UI( uiA64, uiA0 ) ((((uiA64) & 0x7FFF) == 0x7FFF) && ! ((uiA0) & UINT64_C( 0x4000000000000000 )) && ((uiA0) & UINT64_C( 0x3FFFFFFFFFFFFFFF )))
+#define softfloat_isSigNaNExtF80UI(uiA64, uiA0)                                                                                            \
+    ((((uiA64)&0x7FFF) == 0x7FFF) && !((uiA0)&UINT64_C(0x4000000000000000)) && ((uiA0)&UINT64_C(0x3FFFFFFFFFFFFFFF)))

 #ifdef SOFTFLOAT_FAST_INT64

@@ -208,24 +215,25 @@ uint_fast64_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_extF80UIToCommonNaN( uiA64, uiA0, zPtr ) if ( ! ((uiA0) & UINT64_C( 0x4000000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_extF80UIToCommonNaN(uiA64, uiA0, zPtr)                                                                                   \
+    if(!((uiA0)&UINT64_C(0x4000000000000000)))                                                                                             \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and returns the bit pattern of this value as an unsigned
 | integer.
 *----------------------------------------------------------------------------*/
-#if defined INLINE && ! defined softfloat_commonNaNToExtF80UI
+#if defined INLINE && !defined softfloat_commonNaNToExtF80UI
 INLINE
-struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr )
-{
+struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr) {
    struct uint128 uiZ;
    uiZ.v64 = defaultNaNExtF80UI64;
-    uiZ.v0  = defaultNaNExtF80UI0;
+    uiZ.v0 = defaultNaNExtF80UI0;
    return uiZ;
 }
 #else
-struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
+struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr);
 #endif

 /*----------------------------------------------------------------------------
@@ -237,19 +245,13 @@ struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
 | result.  If either original floating-point value is a signaling NaN, the
 | invalid exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNExtF80UI(
-     uint_fast16_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast16_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNExtF80UI(uint_fast16_t uiA64, uint_fast64_t uiA0, uint_fast16_t uiB64, uint_fast64_t uiB0);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF128UI64 UINT64_C( 0x7FFF800000000000 )
-#define defaultNaNF128UI0  UINT64_C( 0 )
+#define defaultNaNF128UI64 UINT64_C(0x7FFF800000000000)
+#define defaultNaNF128UI0 UINT64_C(0)

 /*----------------------------------------------------------------------------
 | Returns true when the 128-bit unsigned integer formed from concatenating
@@ -257,7 +259,8 @@ struct uint128
 | point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))
+#define softfloat_isSigNaNF128UI(uiA64, uiA0)                                                                                              \
+    ((((uiA64)&UINT64_C(0x7FFF800000000000)) == UINT64_C(0x7FFF000000000000)) && ((uiA0) || ((uiA64)&UINT64_C(0x00007FFFFFFFFFFF))))

 /*----------------------------------------------------------------------------
 | Assuming the unsigned integer formed from concatenating 'uiA64' and 'uiA0'
@@ -266,23 +269,24 @@ struct uint128
 | pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid exception
 | is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_f128UIToCommonNaN( uiA64, uiA0, zPtr ) if ( ! ((uiA64) & UINT64_C( 0x0000800000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f128UIToCommonNaN(uiA64, uiA0, zPtr)                                                                                     \
+    if(!((uiA64)&UINT64_C(0x0000800000000000)))                                                                                            \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-#if defined INLINE && ! defined softfloat_commonNaNToF128UI
+#if defined INLINE && !defined softfloat_commonNaNToF128UI
 INLINE
-struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN *aPtr )
-{
+struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN* aPtr) {
    struct uint128 uiZ;
    uiZ.v64 = defaultNaNF128UI64;
-    uiZ.v0  = defaultNaNF128UI0;
+    uiZ.v0 = defaultNaNF128UI0;
    return uiZ;
 }
 #else
-struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
+struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN*);
 #endif

 /*----------------------------------------------------------------------------
@@ -294,13 +298,7 @@ struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
 | If either original floating-point value is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNF128UI(
-     uint_fast64_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast64_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNF128UI(uint_fast64_t uiA64, uint_fast64_t uiA0, uint_fast64_t uiB64, uint_fast64_t uiB0);

 #else

@@ -315,26 +313,23 @@ struct uint128
 | common NaN at the location pointed to by 'zPtr'.  If the NaN is a signaling
 | NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_extF80MToCommonNaN( aSPtr, zPtr ) if ( ! ((aSPtr)->signif & UINT64_C( 0x4000000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_extF80MToCommonNaN(aSPtr, zPtr)                                                                                          \
+    if(!((aSPtr)->signif & UINT64_C(0x4000000000000000)))                                                                                  \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and stores this NaN at the location pointed to by
 | 'zSPtr'.
 *----------------------------------------------------------------------------*/
-#if defined INLINE && ! defined softfloat_commonNaNToExtF80M
+#if defined INLINE && !defined softfloat_commonNaNToExtF80M
 INLINE
-void
- softfloat_commonNaNToExtF80M(
-     const struct commonNaN *aPtr, struct extFloat80M *zSPtr )
-{
+void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr) {
    zSPtr->signExp = defaultNaNExtF80UI64;
-    zSPtr->signif  = defaultNaNExtF80UI0;
+    zSPtr->signif = defaultNaNExtF80UI0;
 }
 #else
-void
- softfloat_commonNaNToExtF80M(
-     const struct commonNaN *aPtr, struct extFloat80M *zSPtr );
+void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr);
 #endif

 /*----------------------------------------------------------------------------
@@ -343,12 +338,7 @@ void
 | at the location pointed to by 'zSPtr'.  If either original floating-point
 | value is a signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNExtF80M(
-     const struct extFloat80M *aSPtr,
-     const struct extFloat80M *bSPtr,
-     struct extFloat80M *zSPtr
- );
+void softfloat_propagateNaNExtF80M(const struct extFloat80M* aSPtr, const struct extFloat80M* bSPtr, struct extFloat80M* zSPtr);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
@@ -356,7 +346,7 @@ void
 #define defaultNaNF128UI96 0x7FFF8000
 #define defaultNaNF128UI64 0
 #define defaultNaNF128UI32 0
-#define defaultNaNF128UI0  0
+#define defaultNaNF128UI0 0

 /*----------------------------------------------------------------------------
 | Assuming the 128-bit floating-point value pointed to by 'aWPtr' is a NaN,
@@ -366,7 +356,9 @@ void
 | four 32-bit elements that concatenate in the platform's normal endian order
 | to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-#define softfloat_f128MToCommonNaN( aWPtr, zPtr ) if ( ! ((aWPtr)[indexWordHi( 4 )] & UINT64_C( 0x0000800000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f128MToCommonNaN(aWPtr, zPtr)                                                                                            \
+    if(!((aWPtr)[indexWordHi(4)] & UINT64_C(0x0000800000000000)))                                                                          \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
@@ -374,19 +366,16 @@ void
 | 'zWPtr' points to an array of four 32-bit elements that concatenate in the
 | platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-#if defined INLINE && ! defined softfloat_commonNaNToF128M
+#if defined INLINE && !defined softfloat_commonNaNToF128M
 INLINE
-void
- softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr )
-{
-    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
-    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
-    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
-    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
+void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr) {
+    zWPtr[indexWord(4, 3)] = defaultNaNF128UI96;
+    zWPtr[indexWord(4, 2)] = defaultNaNF128UI64;
+    zWPtr[indexWord(4, 1)] = defaultNaNF128UI32;
+    zWPtr[indexWord(4, 0)] = defaultNaNF128UI0;
 }
 #else
-void
- softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr );
+void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr);
 #endif

 /*----------------------------------------------------------------------------
@@ -397,11 +386,8 @@ void
 | and 'zWPtr' points to an array of four 32-bit elements that concatenate in
 | the platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNF128M(
-     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr );
+void softfloat_propagateNaNF128M(const uint32_t* aWPtr, const uint32_t* bWPtr, uint32_t* zWPtr);

 #endif

 #endif
-
@@ -37,10 +37,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef specialize_h
 #define specialize_h 1

-#include <stdbool.h>
-#include <stdint.h>
 #include "primitiveTypes.h"
 #include "softfloat.h"
+#include <stdbool.h>
+#include <stdint.h>

 /*----------------------------------------------------------------------------
 | Default value for 'softfloat_detectTininess'.
@@ -53,21 +53,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *----------------------------------------------------------------------------*/
 #define ui32_fromPosOverflow 0xFFFFFFFF
 #define ui32_fromNegOverflow 0
-#define ui32_fromNaN         0
-#define i32_fromPosOverflow  0x7FFFFFFF
-#define i32_fromNegOverflow  (-0x7FFFFFFF - 1)
-#define i32_fromNaN          0
+#define ui32_fromNaN 0
+#define i32_fromPosOverflow 0x7FFFFFFF
+#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
+#define i32_fromNaN 0

 /*----------------------------------------------------------------------------
 | The values to return on conversions to 64-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
-#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
+#define ui64_fromPosOverflow UINT64_C(0xFFFFFFFFFFFFFFFF)
 #define ui64_fromNegOverflow 0
-#define ui64_fromNaN         0
-#define i64_fromPosOverflow  INT64_C( 0x7FFFFFFFFFFFFFFF )
-#define i64_fromNegOverflow  (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
-#define i64_fromNaN          0
+#define ui64_fromNaN 0
+#define i64_fromPosOverflow INT64_C(0x7FFFFFFFFFFFFFFF)
+#define i64_fromNegOverflow (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)
+#define i64_fromNaN 0

 /*----------------------------------------------------------------------------
 | "Common NaN" structure, used to transfer NaN representations from one format
@@ -92,7 +92,7 @@ struct commonNaN {
 | 16-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
+#define softfloat_isSigNaNF16UI(uiA) ((((uiA)&0x7E00) == 0x7C00) && ((uiA)&0x01FF))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
@@ -100,13 +100,13 @@ struct commonNaN {
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr );
+void softfloat_f16UIToCommonNaN(uint_fast16_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );
+uint_fast16_t softfloat_commonNaNToF16UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 16-bit floating-
@@ -114,8 +114,7 @@ uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast16_t
- softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
+uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
@@ -127,7 +126,7 @@ uint_fast16_t
 | 32-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))
+#define softfloat_isSigNaNF32UI(uiA) ((((uiA)&0x7FC00000) == 0x7F800000) && ((uiA)&0x003FFFFF))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 32-bit floating-point NaN, converts
@@ -135,13 +134,13 @@ uint_fast16_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f32UIToCommonNaN( uint_fast32_t uiA, struct commonNaN *zPtr );
+void softfloat_f32UIToCommonNaN(uint_fast32_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
+uint_fast32_t softfloat_commonNaNToF32UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
@@ -149,20 +148,20 @@ uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast32_t
- softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB );
+uint_fast32_t softfloat_propagateNaNF32UI(uint_fast32_t uiA, uint_fast32_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 64-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF64UI UINT64_C( 0x7FF8000000000000 )
+#define defaultNaNF64UI UINT64_C(0x7FF8000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when 64-bit unsigned integer 'uiA' has the bit pattern of a
 | 64-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF64UI( uiA ) ((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && ((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))
+#define softfloat_isSigNaNF64UI(uiA)                                                                                                       \
+    ((((uiA)&UINT64_C(0x7FF8000000000000)) == UINT64_C(0x7FF0000000000000)) && ((uiA)&UINT64_C(0x0007FFFFFFFFFFFF)))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 64-bit floating-point NaN, converts
@@ -170,13 +169,13 @@ uint_fast32_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void softfloat_f64UIToCommonNaN( uint_fast64_t uiA, struct commonNaN *zPtr );
+void softfloat_f64UIToCommonNaN(uint_fast64_t uiA, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
+uint_fast64_t softfloat_commonNaNToF64UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 64-bit floating-
@@ -184,14 +183,13 @@ uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast64_t
- softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB );
+uint_fast64_t softfloat_propagateNaNF64UI(uint_fast64_t uiA, uint_fast64_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 80-bit extended floating-point NaN.
 *----------------------------------------------------------------------------*/
 #define defaultNaNExtF80UI64 0x7FFF
-#define defaultNaNExtF80UI0  UINT64_C( 0xC000000000000000 )
+#define defaultNaNExtF80UI0 UINT64_C(0xC000000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when the 80-bit unsigned integer formed from concatenating
@@ -199,7 +197,8 @@ uint_fast64_t
 | floating-point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNExtF80UI( uiA64, uiA0 ) ((((uiA64) & 0x7FFF) == 0x7FFF) && ! ((uiA0) & UINT64_C( 0x4000000000000000 )) && ((uiA0) & UINT64_C( 0x3FFFFFFFFFFFFFFF )))
+#define softfloat_isSigNaNExtF80UI(uiA64, uiA0)                                                                                            \
+    ((((uiA64)&0x7FFF) == 0x7FFF) && !((uiA0)&UINT64_C(0x4000000000000000)) && ((uiA0)&UINT64_C(0x3FFFFFFFFFFFFFFF)))

 #ifdef SOFTFLOAT_FAST_INT64

@@ -215,16 +214,14 @@ uint_fast64_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_extF80UIToCommonNaN(
-     uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );
+void softfloat_extF80UIToCommonNaN(uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and returns the bit pattern of this value as an unsigned
 | integer.
 *----------------------------------------------------------------------------*/
-struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
+struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr);

 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -235,19 +232,13 @@ struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
 | result.  If either original floating-point value is a signaling NaN, the
 | invalid exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNExtF80UI(
-     uint_fast16_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast16_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNExtF80UI(uint_fast16_t uiA64, uint_fast64_t uiA0, uint_fast16_t uiB64, uint_fast64_t uiB0);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF128UI64 UINT64_C( 0x7FFF800000000000 )
-#define defaultNaNF128UI0  UINT64_C( 0 )
+#define defaultNaNF128UI64 UINT64_C(0x7FFF800000000000)
+#define defaultNaNF128UI0 UINT64_C(0)

 /*----------------------------------------------------------------------------
 | Returns true when the 128-bit unsigned integer formed from concatenating
@@ -255,7 +246,8 @@ struct uint128
 | point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))
+#define softfloat_isSigNaNF128UI(uiA64, uiA0)                                                                                              \
+    ((((uiA64)&UINT64_C(0x7FFF800000000000)) == UINT64_C(0x7FFF000000000000)) && ((uiA0) || ((uiA64)&UINT64_C(0x00007FFFFFFFFFFF))))

 /*----------------------------------------------------------------------------
 | Assuming the unsigned integer formed from concatenating 'uiA64' and 'uiA0'
@@ -264,15 +256,13 @@ struct uint128
 | pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid exception
 | is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_f128UIToCommonNaN(
-     uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );
+void softfloat_f128UIToCommonNaN(uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
+struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN*);

 /*----------------------------------------------------------------------------
 | Interpreting the unsigned integer formed from concatenating 'uiA64' and
@@ -283,13 +273,7 @@ struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
 | If either original floating-point value is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNF128UI(
-     uint_fast64_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast64_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNF128UI(uint_fast64_t uiA64, uint_fast64_t uiA0, uint_fast64_t uiB64, uint_fast64_t uiB0);

 #else

@@ -304,18 +288,14 @@ struct uint128
 | common NaN at the location pointed to by 'zPtr'.  If the NaN is a signaling
 | NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_extF80MToCommonNaN(
-     const struct extFloat80M *aSPtr, struct commonNaN *zPtr );
+void softfloat_extF80MToCommonNaN(const struct extFloat80M* aSPtr, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and stores this NaN at the location pointed to by
 | 'zSPtr'.
 *----------------------------------------------------------------------------*/
-void
- softfloat_commonNaNToExtF80M(
-     const struct commonNaN *aPtr, struct extFloat80M *zSPtr );
+void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr);

 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 80-bit extended floating-point values
@@ -323,12 +303,7 @@ void
 | at the location pointed to by 'zSPtr'.  If either original floating-point
 | value is a signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNExtF80M(
-     const struct extFloat80M *aSPtr,
-     const struct extFloat80M *bSPtr,
-     struct extFloat80M *zSPtr
- );
+void softfloat_propagateNaNExtF80M(const struct extFloat80M* aSPtr, const struct extFloat80M* bSPtr, struct extFloat80M* zSPtr);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
@@ -336,7 +311,7 @@ void
 #define defaultNaNF128UI96 0x7FFF8000
 #define defaultNaNF128UI64 0
 #define defaultNaNF128UI32 0
-#define defaultNaNF128UI0  0
+#define defaultNaNF128UI0 0

 /*----------------------------------------------------------------------------
 | Assuming the 128-bit floating-point value pointed to by 'aWPtr' is a NaN,
@@ -346,8 +321,7 @@ void
 | four 32-bit elements that concatenate in the platform's normal endian order
 | to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_f128MToCommonNaN( const uint32_t *aWPtr, struct commonNaN *zPtr );
+void softfloat_f128MToCommonNaN(const uint32_t* aWPtr, struct commonNaN* zPtr);

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
@@ -355,8 +329,7 @@ void
 | 'zWPtr' points to an array of four 32-bit elements that concatenate in the
 | platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr );
+void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr);

 /*----------------------------------------------------------------------------
 | Assuming at least one of the two 128-bit floating-point values pointed to by
@@ -366,11 +339,8 @@ void
 | and 'zWPtr' points to an array of four 32-bit elements that concatenate in
 | the platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNF128M(
-     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr );
+void softfloat_propagateNaNF128M(const uint32_t* aWPtr, const uint32_t* bWPtr, uint32_t* zWPtr);

 #endif

 #endif
-
@@ -37,10 +37,10 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef specialize_h
 #define specialize_h 1

-#include <stdbool.h>
-#include <stdint.h>
 #include "primitiveTypes.h"
 #include "softfloat.h"
+#include <stdbool.h>
+#include <stdint.h>

 /*----------------------------------------------------------------------------
 | Default value for 'softfloat_detectTininess'.
@@ -53,27 +53,29 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *----------------------------------------------------------------------------*/
 #define ui32_fromPosOverflow 0xFFFFFFFF
 #define ui32_fromNegOverflow 0
-#define ui32_fromNaN         0xFFFFFFFF
-#define i32_fromPosOverflow  0x7FFFFFFF
-#define i32_fromNegOverflow  (-0x7FFFFFFF - 1)
-#define i32_fromNaN          0x7FFFFFFF
+#define ui32_fromNaN 0xFFFFFFFF
+#define i32_fromPosOverflow 0x7FFFFFFF
+#define i32_fromNegOverflow (-0x7FFFFFFF - 1)
+#define i32_fromNaN 0x7FFFFFFF

 /*----------------------------------------------------------------------------
 | The values to return on conversions to 64-bit integer formats that raise an
 | invalid exception.
 *----------------------------------------------------------------------------*/
-#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
+#define ui64_fromPosOverflow UINT64_C(0xFFFFFFFFFFFFFFFF)
 #define ui64_fromNegOverflow 0
-#define ui64_fromNaN         UINT64_C( 0xFFFFFFFFFFFFFFFF )
-#define i64_fromPosOverflow  INT64_C( 0x7FFFFFFFFFFFFFFF )
-#define i64_fromNegOverflow  (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
-#define i64_fromNaN          INT64_C( 0x7FFFFFFFFFFFFFFF )
+#define ui64_fromNaN UINT64_C(0xFFFFFFFFFFFFFFFF)
+#define i64_fromPosOverflow INT64_C(0x7FFFFFFFFFFFFFFF)
+#define i64_fromNegOverflow (-INT64_C(0x7FFFFFFFFFFFFFFF) - 1)
+#define i64_fromNaN INT64_C(0x7FFFFFFFFFFFFFFF)

 /*----------------------------------------------------------------------------
 | "Common NaN" structure, used to transfer NaN representations from one format
 | to another.
 *----------------------------------------------------------------------------*/
-struct commonNaN { char _unused; };
+struct commonNaN {
+    char _unused;
+};

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 16-bit floating-point NaN.
@@ -85,14 +87,14 @@ struct commonNaN { char _unused; };
 | 16-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))
+#define softfloat_isSigNaNF16UI(uiA) ((((uiA)&0x7E00) == 0x7C00) && ((uiA)&0x01FF))

 /*----------------------------------------------------------------------------
 | Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
 | 16-bit brain floating-point (BF16) signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNBF16UI( uiA ) ((((uiA) & 0x7FC0) == 0x7F80) && ((uiA) & 0x003F))
+#define softfloat_isSigNaNBF16UI(uiA) ((((uiA)&0x7FC0) == 0x7F80) && ((uiA)&0x003F))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
@@ -100,7 +102,9 @@ struct commonNaN { char _unused; };
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_f16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0200) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f16UIToCommonNaN(uiA, zPtr)                                                                                              \
+    if(!((uiA)&0x0200))                                                                                                                    \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 16-bit BF16 floating-point NaN, converts
@@ -108,13 +112,15 @@ struct commonNaN { char _unused; };
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_bf16UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x0040) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_bf16UIToCommonNaN(uiA, zPtr)                                                                                             \
+    if(!((uiA)&0x0040))                                                                                                                    \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-#define softfloat_commonNaNToF16UI( aPtr ) ((uint_fast16_t) defaultNaNF16UI)
+#define softfloat_commonNaNToF16UI(aPtr) ((uint_fast16_t)defaultNaNF16UI)

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 16-bit floating-
@@ -122,8 +128,7 @@ struct commonNaN { char _unused; };
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast16_t
- softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );
+uint_fast16_t softfloat_propagateNaNF16UI(uint_fast16_t uiA, uint_fast16_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 16-bit BF16 floating-point NaN.
@@ -134,7 +139,7 @@ uint_fast16_t
 | Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-#define softfloat_commonNaNToBF16UI( aPtr ) ((uint_fast16_t) defaultNaNBF16UI)
+#define softfloat_commonNaNToBF16UI(aPtr) ((uint_fast16_t)defaultNaNBF16UI)

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 32-bit floating-point NaN.
@@ -146,7 +151,7 @@ uint_fast16_t
 | 32-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))
+#define softfloat_isSigNaNF32UI(uiA) ((((uiA)&0x7FC00000) == 0x7F800000) && ((uiA)&0x003FFFFF))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 32-bit floating-point NaN, converts
@@ -154,13 +159,15 @@ uint_fast16_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_f32UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & 0x00400000) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f32UIToCommonNaN(uiA, zPtr)                                                                                              \
+    if(!((uiA)&0x00400000))                                                                                                                \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-#define softfloat_commonNaNToF32UI( aPtr ) ((uint_fast32_t) defaultNaNF32UI)
+#define softfloat_commonNaNToF32UI(aPtr) ((uint_fast32_t)defaultNaNF32UI)

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
@@ -168,20 +175,20 @@ uint_fast16_t
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast32_t
- softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB );
+uint_fast32_t softfloat_propagateNaNF32UI(uint_fast32_t uiA, uint_fast32_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 64-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF64UI UINT64_C( 0x7FF8000000000000 )
+#define defaultNaNF64UI UINT64_C(0x7FF8000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when 64-bit unsigned integer 'uiA' has the bit pattern of a
 | 64-bit floating-point signaling NaN.
 | Note:  This macro evaluates its argument more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF64UI( uiA ) ((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && ((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))
+#define softfloat_isSigNaNF64UI(uiA)                                                                                                       \
+    ((((uiA)&UINT64_C(0x7FF8000000000000)) == UINT64_C(0x7FF0000000000000)) && ((uiA)&UINT64_C(0x0007FFFFFFFFFFFF)))

 /*----------------------------------------------------------------------------
 | Assuming 'uiA' has the bit pattern of a 64-bit floating-point NaN, converts
@@ -189,13 +196,15 @@ uint_fast32_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_f64UIToCommonNaN( uiA, zPtr ) if ( ! ((uiA) & UINT64_C( 0x0008000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f64UIToCommonNaN(uiA, zPtr)                                                                                              \
+    if(!((uiA)&UINT64_C(0x0008000000000000)))                                                                                              \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-#define softfloat_commonNaNToF64UI( aPtr ) ((uint_fast64_t) defaultNaNF64UI)
+#define softfloat_commonNaNToF64UI(aPtr) ((uint_fast64_t)defaultNaNF64UI)

 /*----------------------------------------------------------------------------
 | Interpreting 'uiA' and 'uiB' as the bit patterns of two 64-bit floating-
@@ -203,14 +212,13 @@ uint_fast32_t
 | the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
 | signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-uint_fast64_t
- softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB );
+uint_fast64_t softfloat_propagateNaNF64UI(uint_fast64_t uiA, uint_fast64_t uiB);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 80-bit extended floating-point NaN.
 *----------------------------------------------------------------------------*/
 #define defaultNaNExtF80UI64 0x7FFF
-#define defaultNaNExtF80UI0  UINT64_C( 0xC000000000000000 )
+#define defaultNaNExtF80UI0 UINT64_C(0xC000000000000000)

 /*----------------------------------------------------------------------------
 | Returns true when the 80-bit unsigned integer formed from concatenating
@@ -218,7 +226,8 @@ uint_fast64_t
 | floating-point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNExtF80UI( uiA64, uiA0 ) ((((uiA64) & 0x7FFF) == 0x7FFF) && ! ((uiA0) & UINT64_C( 0x4000000000000000 )) && ((uiA0) & UINT64_C( 0x3FFFFFFFFFFFFFFF )))
+#define softfloat_isSigNaNExtF80UI(uiA64, uiA0)                                                                                            \
+    ((((uiA64)&0x7FFF) == 0x7FFF) && !((uiA0)&UINT64_C(0x4000000000000000)) && ((uiA0)&UINT64_C(0x3FFFFFFFFFFFFFFF)))

 #ifdef SOFTFLOAT_FAST_INT64

@@ -234,24 +243,25 @@ uint_fast64_t
 | location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_extF80UIToCommonNaN( uiA64, uiA0, zPtr ) if ( ! ((uiA0) & UINT64_C( 0x4000000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_extF80UIToCommonNaN(uiA64, uiA0, zPtr)                                                                                   \
+    if(!((uiA0)&UINT64_C(0x4000000000000000)))                                                                                             \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and returns the bit pattern of this value as an unsigned
 | integer.
 *----------------------------------------------------------------------------*/
-#if defined INLINE && ! defined softfloat_commonNaNToExtF80UI
+#if defined INLINE && !defined softfloat_commonNaNToExtF80UI
 INLINE
-struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr )
-{
+struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr) {
    struct uint128 uiZ;
    uiZ.v64 = defaultNaNExtF80UI64;
-    uiZ.v0  = defaultNaNExtF80UI0;
+    uiZ.v0 = defaultNaNExtF80UI0;
    return uiZ;
 }
 #else
-struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
+struct uint128 softfloat_commonNaNToExtF80UI(const struct commonNaN* aPtr);
 #endif

 /*----------------------------------------------------------------------------
@@ -263,19 +273,13 @@ struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );
 | result.  If either original floating-point value is a signaling NaN, the
 | invalid exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNExtF80UI(
-     uint_fast16_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast16_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNExtF80UI(uint_fast16_t uiA64, uint_fast64_t uiA0, uint_fast16_t uiB64, uint_fast64_t uiB0);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
 *----------------------------------------------------------------------------*/
-#define defaultNaNF128UI64 UINT64_C( 0x7FFF800000000000 )
-#define defaultNaNF128UI0  UINT64_C( 0 )
+#define defaultNaNF128UI64 UINT64_C(0x7FFF800000000000)
+#define defaultNaNF128UI0 UINT64_C(0)

 /*----------------------------------------------------------------------------
 | Returns true when the 128-bit unsigned integer formed from concatenating
@@ -283,7 +287,8 @@ struct uint128
 | point signaling NaN.
 | Note:  This macro evaluates its arguments more than once.
 *----------------------------------------------------------------------------*/
-#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))
+#define softfloat_isSigNaNF128UI(uiA64, uiA0)                                                                                              \
+    ((((uiA64)&UINT64_C(0x7FFF800000000000)) == UINT64_C(0x7FFF000000000000)) && ((uiA0) || ((uiA64)&UINT64_C(0x00007FFFFFFFFFFF))))

 /*----------------------------------------------------------------------------
 | Assuming the unsigned integer formed from concatenating 'uiA64' and 'uiA0'
@@ -292,23 +297,24 @@ struct uint128
 | pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid exception
 | is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_f128UIToCommonNaN( uiA64, uiA0, zPtr ) if ( ! ((uiA64) & UINT64_C( 0x0000800000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f128UIToCommonNaN(uiA64, uiA0, zPtr)                                                                                     \
+    if(!((uiA64)&UINT64_C(0x0000800000000000)))                                                                                            \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
 | NaN, and returns the bit pattern of this value as an unsigned integer.
 *----------------------------------------------------------------------------*/
-#if defined INLINE && ! defined softfloat_commonNaNToF128UI
+#if defined INLINE && !defined softfloat_commonNaNToF128UI
 INLINE
-struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN *aPtr )
-{
+struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN* aPtr) {
    struct uint128 uiZ;
    uiZ.v64 = defaultNaNF128UI64;
-    uiZ.v0  = defaultNaNF128UI0;
+    uiZ.v0 = defaultNaNF128UI0;
    return uiZ;
 }
 #else
-struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
+struct uint128 softfloat_commonNaNToF128UI(const struct commonNaN*);
 #endif

 /*----------------------------------------------------------------------------
@@ -320,13 +326,7 @@ struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );
 | If either original floating-point value is a signaling NaN, the invalid
 | exception is raised.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_propagateNaNF128UI(
-     uint_fast64_t uiA64,
-     uint_fast64_t uiA0,
-     uint_fast64_t uiB64,
-     uint_fast64_t uiB0
- );
+struct uint128 softfloat_propagateNaNF128UI(uint_fast64_t uiA64, uint_fast64_t uiA0, uint_fast64_t uiB64, uint_fast64_t uiB0);

 #else

@@ -341,26 +341,23 @@ struct uint128
 | common NaN at the location pointed to by 'zPtr'.  If the NaN is a signaling
 | NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-#define softfloat_extF80MToCommonNaN( aSPtr, zPtr ) if ( ! ((aSPtr)->signif & UINT64_C( 0x4000000000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_extF80MToCommonNaN(aSPtr, zPtr)                                                                                          \
+    if(!((aSPtr)->signif & UINT64_C(0x4000000000000000)))                                                                                  \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
 | floating-point NaN, and stores this NaN at the location pointed to by
 | 'zSPtr'.
 *----------------------------------------------------------------------------*/
-#if defined INLINE && ! defined softfloat_commonNaNToExtF80M
+#if defined INLINE && !defined softfloat_commonNaNToExtF80M
 INLINE
-void
- softfloat_commonNaNToExtF80M(
-     const struct commonNaN *aPtr, struct extFloat80M *zSPtr )
-{
+void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr) {
    zSPtr->signExp = defaultNaNExtF80UI64;
-    zSPtr->signif  = defaultNaNExtF80UI0;
+    zSPtr->signif = defaultNaNExtF80UI0;
 }
 #else
-void
- softfloat_commonNaNToExtF80M(
-     const struct commonNaN *aPtr, struct extFloat80M *zSPtr );
+void softfloat_commonNaNToExtF80M(const struct commonNaN* aPtr, struct extFloat80M* zSPtr);
 #endif

 /*----------------------------------------------------------------------------
@@ -369,12 +366,7 @@ void
 | at the location pointed to by 'zSPtr'.  If either original floating-point
 | value is a signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNExtF80M(
-     const struct extFloat80M *aSPtr,
-     const struct extFloat80M *bSPtr,
-     struct extFloat80M *zSPtr
- );
+void softfloat_propagateNaNExtF80M(const struct extFloat80M* aSPtr, const struct extFloat80M* bSPtr, struct extFloat80M* zSPtr);

 /*----------------------------------------------------------------------------
 | The bit pattern for a default generated 128-bit floating-point NaN.
@@ -382,7 +374,7 @@ void
 #define defaultNaNF128UI96 0x7FFF8000
 #define defaultNaNF128UI64 0
 #define defaultNaNF128UI32 0
-#define defaultNaNF128UI0  0
+#define defaultNaNF128UI0 0

 /*----------------------------------------------------------------------------
 | Assuming the 128-bit floating-point value pointed to by 'aWPtr' is a NaN,
@@ -392,7 +384,9 @@ void
 | four 32-bit elements that concatenate in the platform's normal endian order
 | to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-#define softfloat_f128MToCommonNaN( aWPtr, zPtr ) if ( ! ((aWPtr)[indexWordHi( 4 )] & UINT64_C( 0x0000800000000000 )) ) softfloat_raiseFlags( softfloat_flag_invalid )
+#define softfloat_f128MToCommonNaN(aWPtr, zPtr)                                                                                            \
+    if(!((aWPtr)[indexWordHi(4)] & UINT64_C(0x0000800000000000)))                                                                          \
+    softfloat_raiseFlags(softfloat_flag_invalid)

 /*----------------------------------------------------------------------------
 | Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
@@ -400,19 +394,16 @@ void
 | 'zWPtr' points to an array of four 32-bit elements that concatenate in the
 | platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-#if defined INLINE && ! defined softfloat_commonNaNToF128M
+#if defined INLINE && !defined softfloat_commonNaNToF128M
 INLINE
-void
- softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr )
-{
-    zWPtr[indexWord( 4, 3 )] = defaultNaNF128UI96;
-    zWPtr[indexWord( 4, 2 )] = defaultNaNF128UI64;
-    zWPtr[indexWord( 4, 1 )] = defaultNaNF128UI32;
-    zWPtr[indexWord( 4, 0 )] = defaultNaNF128UI0;
+void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr) {
+    zWPtr[indexWord(4, 3)] = defaultNaNF128UI96;
+    zWPtr[indexWord(4, 2)] = defaultNaNF128UI64;
+    zWPtr[indexWord(4, 1)] = defaultNaNF128UI32;
+    zWPtr[indexWord(4, 0)] = defaultNaNF128UI0;
 }
 #else
-void
- softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr );
+void softfloat_commonNaNToF128M(const struct commonNaN* aPtr, uint32_t* zWPtr);
 #endif

 /*----------------------------------------------------------------------------
@@ -423,11 +414,8 @@ void
 | and 'zWPtr' points to an array of four 32-bit elements that concatenate in
 | the platform's normal endian order to form a 128-bit floating-point value.
 *----------------------------------------------------------------------------*/
-void
- softfloat_propagateNaNF128M(
-     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr );
+void softfloat_propagateNaNF128M(const uint32_t* aWPtr, const uint32_t* bWPtr, uint32_t* zWPtr);

 #endif

 #endif
-
@@ -37,255 +37,221 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef internals_h
 #define internals_h 1

-#include <stdbool.h>
-#include <stdint.h>
 #include "primitives.h"
 #include "softfloat_types.h"
+#include <stdbool.h>
+#include <stdint.h>

-union ui16_f16 { uint16_t ui; float16_t f; };
-union ui16_bf16 { uint16_t ui; bfloat16_t f; };
-union ui32_f32 { uint32_t ui; float32_t f; };
-union ui64_f64 { uint64_t ui; float64_t f; };
-
-#ifdef SOFTFLOAT_FAST_INT64
-union extF80M_extF80 { struct extFloat80M fM; extFloat80_t f; };
-union ui128_f128 { struct uint128 ui; float128_t f; };
-#endif
-
-enum {
-    softfloat_mulAdd_subC    = 1,
-    softfloat_mulAdd_subProd = 2
+union ui16_f16 {
+    uint16_t ui;
+    float16_t f;
+};
+union ui16_bf16 {
+    uint16_t ui;
+    bfloat16_t f;
+};
+union ui32_f32 {
+    uint32_t ui;
+    float32_t f;
+};
+union ui64_f64 {
+    uint64_t ui;
+    float64_t f;
 };

-/*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
-uint_fast32_t softfloat_roundToUI32( bool, uint_fast64_t, uint_fast8_t, bool );
-
 #ifdef SOFTFLOAT_FAST_INT64
-uint_fast64_t
- softfloat_roundToUI64(
-     bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
-#else
-uint_fast64_t softfloat_roundMToUI64( bool, uint32_t *, uint_fast8_t, bool );
+union extF80M_extF80 {
+    struct extFloat80M fM;
+    extFloat80_t f;
+};
+union ui128_f128 {
+    struct uint128 ui;
+    float128_t f;
+};
 #endif

-int_fast32_t softfloat_roundToI32( bool, uint_fast64_t, uint_fast8_t, bool );
+enum { softfloat_mulAdd_subC = 1, softfloat_mulAdd_subProd = 2 };
+
+/*----------------------------------------------------------------------------
+ *----------------------------------------------------------------------------*/
+uint_fast32_t softfloat_roundToUI32(bool, uint_fast64_t, uint_fast8_t, bool);

 #ifdef SOFTFLOAT_FAST_INT64
-int_fast64_t
- softfloat_roundToI64(
-     bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
+uint_fast64_t softfloat_roundToUI64(bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool);
 #else
-int_fast64_t softfloat_roundMToI64( bool, uint32_t *, uint_fast8_t, bool );
+uint_fast64_t softfloat_roundMToUI64(bool, uint32_t*, uint_fast8_t, bool);
+#endif
+
+int_fast32_t softfloat_roundToI32(bool, uint_fast64_t, uint_fast8_t, bool);
+
+#ifdef SOFTFLOAT_FAST_INT64
+int_fast64_t softfloat_roundToI64(bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool);
+#else
+int_fast64_t softfloat_roundMToI64(bool, uint32_t*, uint_fast8_t, bool);
 #endif

 /*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
-#define signF16UI( a ) ((bool) ((uint16_t) (a)>>15))
-#define expF16UI( a ) ((int_fast8_t) ((a)>>10) & 0x1F)
-#define fracF16UI( a ) ((a) & 0x03FF)
-#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig))
+ *----------------------------------------------------------------------------*/
+#define signF16UI(a) ((bool)((uint16_t)(a) >> 15))
+#define expF16UI(a) ((int_fast8_t)((a) >> 10) & 0x1F)
+#define fracF16UI(a) ((a)&0x03FF)
+#define packToF16UI(sign, exp, sig) (((uint16_t)(sign) << 15) + ((uint16_t)(exp) << 10) + (sig))

-#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF))
+#define isNaNF16UI(a) (((~(a)&0x7C00) == 0) && ((a)&0x03FF))

-struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; };
-struct exp8_sig16 softfloat_normSubnormalF16Sig( uint_fast16_t );
+struct exp8_sig16 {
+    int_fast8_t exp;
+    uint_fast16_t sig;
+};
+struct exp8_sig16 softfloat_normSubnormalF16Sig(uint_fast16_t);

-float16_t softfloat_roundPackToF16( bool, int_fast16_t, uint_fast16_t );
-float16_t softfloat_normRoundPackToF16( bool, int_fast16_t, uint_fast16_t );
+float16_t softfloat_roundPackToF16(bool, int_fast16_t, uint_fast16_t);
+float16_t softfloat_normRoundPackToF16(bool, int_fast16_t, uint_fast16_t);

-float16_t softfloat_addMagsF16( uint_fast16_t, uint_fast16_t );
-float16_t softfloat_subMagsF16( uint_fast16_t, uint_fast16_t );
-float16_t
- softfloat_mulAddF16(
-     uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t );
+float16_t softfloat_addMagsF16(uint_fast16_t, uint_fast16_t);
+float16_t softfloat_subMagsF16(uint_fast16_t, uint_fast16_t);
+float16_t softfloat_mulAddF16(uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t);

 /*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
-#define signBF16UI( a ) ((bool) ((uint16_t) (a)>>15))
-#define expBF16UI( a ) ((int_fast16_t) ((a)>>7) & 0xFF)
-#define fracBF16UI( a ) ((a) & 0x07F)
-#define packToBF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<7) + (sig))
+ *----------------------------------------------------------------------------*/
+#define signBF16UI(a) ((bool)((uint16_t)(a) >> 15))
+#define expBF16UI(a) ((int_fast16_t)((a) >> 7) & 0xFF)
+#define fracBF16UI(a) ((a)&0x07F)
+#define packToBF16UI(sign, exp, sig) (((uint16_t)(sign) << 15) + ((uint16_t)(exp) << 7) + (sig))

-#define isNaNBF16UI( a ) (((~(a) & 0x7FC0) == 0) && ((a) & 0x07F))
+#define isNaNBF16UI(a) (((~(a)&0x7FC0) == 0) && ((a)&0x07F))

-bfloat16_t softfloat_roundPackToBF16( bool, int_fast16_t, uint_fast16_t );
-struct exp8_sig16 softfloat_normSubnormalBF16Sig( uint_fast16_t );
+bfloat16_t softfloat_roundPackToBF16(bool, int_fast16_t, uint_fast16_t);
+struct exp8_sig16 softfloat_normSubnormalBF16Sig(uint_fast16_t);

 /*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
-#define signF32UI( a ) ((bool) ((uint32_t) (a)>>31))
-#define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF)
-#define fracF32UI( a ) ((a) & 0x007FFFFF)
-#define packToF32UI( sign, exp, sig ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<23) + (sig))
+ *----------------------------------------------------------------------------*/
+#define signF32UI(a) ((bool)((uint32_t)(a) >> 31))
+#define expF32UI(a) ((int_fast16_t)((a) >> 23) & 0xFF)
+#define fracF32UI(a) ((a)&0x007FFFFF)
+#define packToF32UI(sign, exp, sig) (((uint32_t)(sign) << 31) + ((uint32_t)(exp) << 23) + (sig))

-#define isNaNF32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF))
+#define isNaNF32UI(a) (((~(a)&0x7F800000) == 0) && ((a)&0x007FFFFF))

-struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; };
-struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );
+struct exp16_sig32 {
+    int_fast16_t exp;
+    uint_fast32_t sig;
+};
+struct exp16_sig32 softfloat_normSubnormalF32Sig(uint_fast32_t);

-float32_t softfloat_roundPackToF32( bool, int_fast16_t, uint_fast32_t );
-float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t );
+float32_t softfloat_roundPackToF32(bool, int_fast16_t, uint_fast32_t);
+float32_t softfloat_normRoundPackToF32(bool, int_fast16_t, uint_fast32_t);

-float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t );
-float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t );
-float32_t
- softfloat_mulAddF32(
-     uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast8_t );
+float32_t softfloat_addMagsF32(uint_fast32_t, uint_fast32_t);
+float32_t softfloat_subMagsF32(uint_fast32_t, uint_fast32_t);
+float32_t softfloat_mulAddF32(uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast8_t);

 /*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
-#define signF64UI( a ) ((bool) ((uint64_t) (a)>>63))
-#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF)
-#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
-#define packToF64UI( sign, exp, sig ) ((uint64_t) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<52) + (sig)))
+ *----------------------------------------------------------------------------*/
+#define signF64UI(a) ((bool)((uint64_t)(a) >> 63))
+#define expF64UI(a) ((int_fast16_t)((a) >> 52) & 0x7FF)
+#define fracF64UI(a) ((a)&UINT64_C(0x000FFFFFFFFFFFFF))
+#define packToF64UI(sign, exp, sig) ((uint64_t)(((uint_fast64_t)(sign) << 63) + ((uint_fast64_t)(exp) << 52) + (sig)))

-#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0000000000000 )) == 0) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )))
+#define isNaNF64UI(a) (((~(a)&UINT64_C(0x7FF0000000000000)) == 0) && ((a)&UINT64_C(0x000FFFFFFFFFFFFF)))

-struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
-struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );
+struct exp16_sig64 {
+    int_fast16_t exp;
+    uint_fast64_t sig;
+};
+struct exp16_sig64 softfloat_normSubnormalF64Sig(uint_fast64_t);

-float64_t softfloat_roundPackToF64( bool, int_fast16_t, uint_fast64_t );
-float64_t softfloat_normRoundPackToF64( bool, int_fast16_t, uint_fast64_t );
+float64_t softfloat_roundPackToF64(bool, int_fast16_t, uint_fast64_t);
+float64_t softfloat_normRoundPackToF64(bool, int_fast16_t, uint_fast64_t);

-float64_t softfloat_addMagsF64( uint_fast64_t, uint_fast64_t, bool );
-float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );
-float64_t
- softfloat_mulAddF64(
-     uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );
+float64_t softfloat_addMagsF64(uint_fast64_t, uint_fast64_t, bool);
+float64_t softfloat_subMagsF64(uint_fast64_t, uint_fast64_t, bool);
+float64_t softfloat_mulAddF64(uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast8_t);

 /*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
-#define signExtF80UI64( a64 ) ((bool) ((uint16_t) (a64)>>15))
-#define expExtF80UI64( a64 ) ((a64) & 0x7FFF)
-#define packToExtF80UI64( sign, exp ) ((uint_fast16_t) (sign)<<15 | (exp))
+ *----------------------------------------------------------------------------*/
+#define signExtF80UI64(a64) ((bool)((uint16_t)(a64) >> 15))
+#define expExtF80UI64(a64) ((a64)&0x7FFF)
+#define packToExtF80UI64(sign, exp) ((uint_fast16_t)(sign) << 15 | (exp))

-#define isNaNExtF80UI( a64, a0 ) ((((a64) & 0x7FFF) == 0x7FFF) && ((a0) & UINT64_C( 0x7FFFFFFFFFFFFFFF )))
+#define isNaNExtF80UI(a64, a0) ((((a64)&0x7FFF) == 0x7FFF) && ((a0)&UINT64_C(0x7FFFFFFFFFFFFFFF)))

 #ifdef SOFTFLOAT_FAST_INT64

 /*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
+ *----------------------------------------------------------------------------*/

-struct exp32_sig64 { int_fast32_t exp; uint64_t sig; };
-struct exp32_sig64 softfloat_normSubnormalExtF80Sig( uint_fast64_t );
+struct exp32_sig64 {
+    int_fast32_t exp;
+    uint64_t sig;
+};
+struct exp32_sig64 softfloat_normSubnormalExtF80Sig(uint_fast64_t);

-extFloat80_t
- softfloat_roundPackToExtF80(
-     bool, int_fast32_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );
-extFloat80_t
- softfloat_normRoundPackToExtF80(
-     bool, int_fast32_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );
+extFloat80_t softfloat_roundPackToExtF80(bool, int_fast32_t, uint_fast64_t, uint_fast64_t, uint_fast8_t);
+extFloat80_t softfloat_normRoundPackToExtF80(bool, int_fast32_t, uint_fast64_t, uint_fast64_t, uint_fast8_t);

-extFloat80_t
- softfloat_addMagsExtF80(
-     uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );
-extFloat80_t
- softfloat_subMagsExtF80(
-     uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );
+extFloat80_t softfloat_addMagsExtF80(uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool);
+extFloat80_t softfloat_subMagsExtF80(uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool);

 /*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
-#define signF128UI64( a64 ) ((bool) ((uint64_t) (a64)>>63))
-#define expF128UI64( a64 ) ((int_fast32_t) ((a64)>>48) & 0x7FFF)
-#define fracF128UI64( a64 ) ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))
-#define packToF128UI64( sign, exp, sig64 ) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<48) + (sig64))
+ *----------------------------------------------------------------------------*/
+#define signF128UI64(a64) ((bool)((uint64_t)(a64) >> 63))
+#define expF128UI64(a64) ((int_fast32_t)((a64) >> 48) & 0x7FFF)
+#define fracF128UI64(a64) ((a64)&UINT64_C(0x0000FFFFFFFFFFFF))
+#define packToF128UI64(sign, exp, sig64) (((uint_fast64_t)(sign) << 63) + ((uint_fast64_t)(exp) << 48) + (sig64))

-#define isNaNF128UI( a64, a0 ) (((~(a64) & UINT64_C( 0x7FFF000000000000 )) == 0) && (a0 || ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))))
+#define isNaNF128UI(a64, a0) (((~(a64)&UINT64_C(0x7FFF000000000000)) == 0) && (a0 || ((a64)&UINT64_C(0x0000FFFFFFFFFFFF))))

-struct exp32_sig128 { int_fast32_t exp; struct uint128 sig; };
-struct exp32_sig128
- softfloat_normSubnormalF128Sig( uint_fast64_t, uint_fast64_t );
+struct exp32_sig128 {
+    int_fast32_t exp;
+    struct uint128 sig;
+};
+struct exp32_sig128 softfloat_normSubnormalF128Sig(uint_fast64_t, uint_fast64_t);

-float128_t
- softfloat_roundPackToF128(
-     bool, int_fast32_t, uint_fast64_t, uint_fast64_t, uint_fast64_t );
-float128_t
- softfloat_normRoundPackToF128(
-     bool, int_fast32_t, uint_fast64_t, uint_fast64_t );
+float128_t softfloat_roundPackToF128(bool, int_fast32_t, uint_fast64_t, uint_fast64_t, uint_fast64_t);
+float128_t softfloat_normRoundPackToF128(bool, int_fast32_t, uint_fast64_t, uint_fast64_t);

-float128_t
- softfloat_addMagsF128(
-     uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool );
-float128_t
- softfloat_subMagsF128(
-     uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool );
-float128_t
- softfloat_mulAddF128(
-     uint_fast64_t,
-     uint_fast64_t,
-     uint_fast64_t,
-     uint_fast64_t,
-     uint_fast64_t,
-     uint_fast64_t,
-     uint_fast8_t
- );
+float128_t softfloat_addMagsF128(uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool);
+float128_t softfloat_subMagsF128(uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool);
+float128_t softfloat_mulAddF128(uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast8_t);

 #else

 /*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
+ *----------------------------------------------------------------------------*/

-bool
- softfloat_tryPropagateNaNExtF80M(
-     const struct extFloat80M *,
-     const struct extFloat80M *,
-     struct extFloat80M *
- );
-void softfloat_invalidExtF80M( struct extFloat80M * );
+bool softfloat_tryPropagateNaNExtF80M(const struct extFloat80M*, const struct extFloat80M*, struct extFloat80M*);
+void softfloat_invalidExtF80M(struct extFloat80M*);

-int softfloat_normExtF80SigM( uint64_t * );
+int softfloat_normExtF80SigM(uint64_t*);

-void
- softfloat_roundPackMToExtF80M(
-     bool, int32_t, uint32_t *, uint_fast8_t, struct extFloat80M * );
-void
- softfloat_normRoundPackMToExtF80M(
-     bool, int32_t, uint32_t *, uint_fast8_t, struct extFloat80M * );
+void softfloat_roundPackMToExtF80M(bool, int32_t, uint32_t*, uint_fast8_t, struct extFloat80M*);
+void softfloat_normRoundPackMToExtF80M(bool, int32_t, uint32_t*, uint_fast8_t, struct extFloat80M*);

-void
- softfloat_addExtF80M(
-     const struct extFloat80M *,
-     const struct extFloat80M *,
-     struct extFloat80M *,
-     bool
- );
+void softfloat_addExtF80M(const struct extFloat80M*, const struct extFloat80M*, struct extFloat80M*, bool);

-int
- softfloat_compareNonnormExtF80M(
-     const struct extFloat80M *, const struct extFloat80M * );
+int softfloat_compareNonnormExtF80M(const struct extFloat80M*, const struct extFloat80M*);

 /*----------------------------------------------------------------------------
-*----------------------------------------------------------------------------*/
-#define signF128UI96( a96 ) ((bool) ((uint32_t) (a96)>>31))
-#define expF128UI96( a96 ) ((int32_t) ((a96)>>16) & 0x7FFF)
-#define fracF128UI96( a96 ) ((a96) & 0x0000FFFF)
-#define packToF128UI96( sign, exp, sig96 ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<16) + (sig96))
+ *----------------------------------------------------------------------------*/
+#define signF128UI96(a96) ((bool)((uint32_t)(a96) >> 31))
+#define expF128UI96(a96) ((int32_t)((a96) >> 16) & 0x7FFF)
+#define fracF128UI96(a96) ((a96)&0x0000FFFF)
+#define packToF128UI96(sign, exp, sig96) (((uint32_t)(sign) << 31) + ((uint32_t)(exp) << 16) + (sig96))

-bool softfloat_isNaNF128M( const uint32_t * );
+bool softfloat_isNaNF128M(const uint32_t*);

-bool
- softfloat_tryPropagateNaNF128M(
-     const uint32_t *, const uint32_t *, uint32_t * );
-void softfloat_invalidF128M( uint32_t * );
+bool softfloat_tryPropagateNaNF128M(const uint32_t*, const uint32_t*, uint32_t*);
+void softfloat_invalidF128M(uint32_t*);

-int softfloat_shiftNormSigF128M( const uint32_t *, uint_fast8_t, uint32_t * );
+int softfloat_shiftNormSigF128M(const uint32_t*, uint_fast8_t, uint32_t*);

-void softfloat_roundPackMToF128M( bool, int32_t, uint32_t *, uint32_t * );
-void softfloat_normRoundPackMToF128M( bool, int32_t, uint32_t *, uint32_t * );
+void softfloat_roundPackMToF128M(bool, int32_t, uint32_t*, uint32_t*);
+void softfloat_normRoundPackMToF128M(bool, int32_t, uint32_t*, uint32_t*);

-void
- softfloat_addF128M( const uint32_t *, const uint32_t *, uint32_t *, bool );
-void
- softfloat_mulAddF128M(
-     const uint32_t *,
-     const uint32_t *,
-     const uint32_t *,
-     uint32_t *,
-     uint_fast8_t
- );
+void softfloat_addF128M(const uint32_t*, const uint32_t*, uint32_t*, bool);
+void softfloat_mulAddF128M(const uint32_t*, const uint32_t*, const uint32_t*, uint32_t*, uint_fast8_t);

 #endif

 #endif
-
@@ -42,13 +42,27 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifdef SOFTFLOAT_FAST_INT64

 #ifdef LITTLEENDIAN
-struct uint128 { uint64_t v0, v64; };
-struct uint64_extra { uint64_t extra, v; };
-struct uint128_extra { uint64_t extra; struct uint128 v; };
+struct uint128 {
+    uint64_t v0, v64;
+};
+struct uint64_extra {
+    uint64_t extra, v;
+};
+struct uint128_extra {
+    uint64_t extra;
+    struct uint128 v;
+};
 #else
-struct uint128 { uint64_t v64, v0; };
-struct uint64_extra { uint64_t v, extra; };
-struct uint128_extra { struct uint128 v; uint64_t extra; };
+struct uint128 {
+    uint64_t v64, v0;
+};
+struct uint64_extra {
+    uint64_t v, extra;
+};
+struct uint128_extra {
+    struct uint128 v;
+    uint64_t extra;
+};
 #endif

 #endif
@@ -59,27 +73,28 @@ struct uint128_extra { struct uint128 v; uint64_t extra; };
 *----------------------------------------------------------------------------*/
 #ifdef LITTLEENDIAN
 #define wordIncr 1
-#define indexWord( total, n ) (n)
-#define indexWordHi( total ) ((total) - 1)
-#define indexWordLo( total ) 0
-#define indexMultiword( total, m, n ) (n)
-#define indexMultiwordHi( total, n ) ((total) - (n))
-#define indexMultiwordLo( total, n ) 0
-#define indexMultiwordHiBut( total, n ) (n)
-#define indexMultiwordLoBut( total, n ) 0
-#define INIT_UINTM4( v3, v2, v1, v0 ) { v0, v1, v2, v3 }
+#define indexWord(total, n) (n)
+#define indexWordHi(total) ((total)-1)
+#define indexWordLo(total) 0
+#define indexMultiword(total, m, n) (n)
+#define indexMultiwordHi(total, n) ((total) - (n))
+#define indexMultiwordLo(total, n) 0
+#define indexMultiwordHiBut(total, n) (n)
+#define indexMultiwordLoBut(total, n) 0
+#define INIT_UINTM4(v3, v2, v1, v0)                                                                                                        \
+    { v0, v1, v2, v3 }
 #else
 #define wordIncr -1
-#define indexWord( total, n ) ((total) - 1 - (n))
-#define indexWordHi( total ) 0
-#define indexWordLo( total ) ((total) - 1)
-#define indexMultiword( total, m, n ) ((total) - 1 - (m))
-#define indexMultiwordHi( total, n ) 0
-#define indexMultiwordLo( total, n ) ((total) - (n))
-#define indexMultiwordHiBut( total, n ) 0
-#define indexMultiwordLoBut( total, n ) (n)
-#define INIT_UINTM4( v3, v2, v1, v0 ) { v3, v2, v1, v0 }
+#define indexWord(total, n) ((total)-1 - (n))
+#define indexWordHi(total) 0
+#define indexWordLo(total) ((total)-1)
+#define indexMultiword(total, m, n) ((total)-1 - (m))
+#define indexMultiwordHi(total, n) 0
+#define indexMultiwordLo(total, n) ((total) - (n))
+#define indexMultiwordHiBut(total, n) 0
+#define indexMultiwordLoBut(total, n) (n)
+#define INIT_UINTM4(v3, v2, v1, v0)                                                                                                        \
+    { v3, v2, v1, v0 }
 #endif

 #endif
-
@@ -37,9 +37,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef primitives_h
 #define primitives_h 1

+#include "primitiveTypes.h"
 #include <stdbool.h>
 #include <stdint.h>
-#include "primitiveTypes.h"

 #ifndef softfloat_shortShiftRightJam64
 /*----------------------------------------------------------------------------
@@ -50,10 +50,9 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
-uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
-    { return a>>dist | ((a & (((uint_fast64_t) 1<<dist) - 1)) != 0); }
+uint64_t softfloat_shortShiftRightJam64(uint64_t a, uint_fast8_t dist) { return a >> dist | ((a & (((uint_fast64_t)1 << dist) - 1)) != 0); }
 #else
-uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist );
+uint64_t softfloat_shortShiftRightJam64(uint64_t a, uint_fast8_t dist);
 #endif
 #endif

@@ -68,13 +67,11 @@ uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist );
 | is zero or nonzero.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
-INLINE uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
-{
-    return
-        (dist < 31) ? a>>dist | ((uint32_t) (a<<(-dist & 31)) != 0) : (a != 0);
+INLINE uint32_t softfloat_shiftRightJam32(uint32_t a, uint_fast16_t dist) {
+    return (dist < 31) ? a >> dist | ((uint32_t)(a << (-dist & 31)) != 0) : (a != 0);
 }
 #else
-uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist );
+uint32_t softfloat_shiftRightJam32(uint32_t a, uint_fast16_t dist);
 #endif
 #endif

@@ -89,13 +86,11 @@ uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist );
 | is zero or nonzero.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
-INLINE uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist )
-{
-    return
-        (dist < 63) ? a>>dist | ((uint64_t) (a<<(-dist & 63)) != 0) : (a != 0);
+INLINE uint64_t softfloat_shiftRightJam64(uint64_t a, uint_fast32_t dist) {
+    return (dist < 63) ? a >> dist | ((uint64_t)(a << (-dist & 63)) != 0) : (a != 0);
 }
 #else
-uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist );
+uint64_t softfloat_shiftRightJam64(uint64_t a, uint_fast32_t dist);
 #endif
 #endif

@@ -112,10 +107,9 @@ extern const uint_least8_t softfloat_countLeadingZeros8[256];
 | 'a'.  If 'a' is zero, 16 is returned.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
-INLINE uint_fast8_t softfloat_countLeadingZeros16( uint16_t a )
-{
+INLINE uint_fast8_t softfloat_countLeadingZeros16(uint16_t a) {
    uint_fast8_t count = 8;
-    if ( 0x100 <= a ) {
+    if(0x100 <= a) {
        count = 0;
        a >>= 8;
    }
@@ -123,7 +117,7 @@ INLINE uint_fast8_t softfloat_countLeadingZeros16( uint16_t a )
    return count;
 }
 #else
-uint_fast8_t softfloat_countLeadingZeros16( uint16_t a );
+uint_fast8_t softfloat_countLeadingZeros16(uint16_t a);
 #endif
 #endif

@@ -133,22 +127,21 @@ uint_fast8_t softfloat_countLeadingZeros16( uint16_t a );
 | 'a'.  If 'a' is zero, 32 is returned.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
-INLINE uint_fast8_t softfloat_countLeadingZeros32( uint32_t a )
-{
+INLINE uint_fast8_t softfloat_countLeadingZeros32(uint32_t a) {
    uint_fast8_t count = 0;
-    if ( a < 0x10000 ) {
+    if(a < 0x10000) {
        count = 16;
        a <<= 16;
    }
-    if ( a < 0x1000000 ) {
+    if(a < 0x1000000) {
        count += 8;
        a <<= 8;
    }
-    count += softfloat_countLeadingZeros8[a>>24];
+    count += softfloat_countLeadingZeros8[a >> 24];
    return count;
 }
 #else
-uint_fast8_t softfloat_countLeadingZeros32( uint32_t a );
+uint_fast8_t softfloat_countLeadingZeros32(uint32_t a);
 #endif
 #endif

@@ -157,7 +150,7 @@ uint_fast8_t softfloat_countLeadingZeros32( uint32_t a );
 | Returns the number of leading 0 bits before the most-significant 1 bit of
 | 'a'.  If 'a' is zero, 64 is returned.
 *----------------------------------------------------------------------------*/
-uint_fast8_t softfloat_countLeadingZeros64( uint64_t a );
+uint_fast8_t softfloat_countLeadingZeros64(uint64_t a);
 #endif

 extern const uint16_t softfloat_approxRecip_1k0s[16];
@@ -176,9 +169,9 @@ extern const uint16_t softfloat_approxRecip_1k1s[16];
 | (units in the last place).
 *----------------------------------------------------------------------------*/
 #ifdef SOFTFLOAT_FAST_DIV64TO32
-#define softfloat_approxRecip32_1( a ) ((uint32_t) (UINT64_C( 0x7FFFFFFFFFFFFFFF ) / (uint32_t) (a)))
+#define softfloat_approxRecip32_1(a) ((uint32_t)(UINT64_C(0x7FFFFFFFFFFFFFFF) / (uint32_t)(a)))
 #else
-uint32_t softfloat_approxRecip32_1( uint32_t a );
+uint32_t softfloat_approxRecip32_1(uint32_t a);
 #endif
 #endif

@@ -204,7 +197,7 @@ extern const uint16_t softfloat_approxRecipSqrt_1k1s[16];
 | returned is also always within the range 0.5 to 1; thus, the most-
 | significant bit of the result is always set.
 *----------------------------------------------------------------------------*/
-uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a );
+uint32_t softfloat_approxRecipSqrt32_1(unsigned int oddExpA, uint32_t a);
 #endif

 #ifdef SOFTFLOAT_FAST_INT64
@@ -222,10 +215,9 @@ uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a );
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (1 <= INLINE_LEVEL)
 INLINE
-bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
-    { return (a64 == b64) && (a0 == b0); }
+bool softfloat_eq128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0) { return (a64 == b64) && (a0 == b0); }
 #else
-bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
+bool softfloat_eq128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0);
 #endif
 #endif

@@ -237,10 +229,9 @@ bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
-bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
-    { return (a64 < b64) || ((a64 == b64) && (a0 <= b0)); }
+bool softfloat_le128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0) { return (a64 < b64) || ((a64 == b64) && (a0 <= b0)); }
 #else
-bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
+bool softfloat_le128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0);
 #endif
 #endif

@@ -252,10 +243,9 @@ bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
-bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
-    { return (a64 < b64) || ((a64 == b64) && (a0 < b0)); }
+bool softfloat_lt128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0) { return (a64 < b64) || ((a64 == b64) && (a0 < b0)); }
 #else
-bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
+bool softfloat_lt128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0);
 #endif
 #endif

@@ -266,17 +256,14 @@ bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
-struct uint128
- softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
-{
+struct uint128 softfloat_shortShiftLeft128(uint64_t a64, uint64_t a0, uint_fast8_t dist) {
    struct uint128 z;
-    z.v64 = a64<<dist | a0>>(-dist & 63);
-    z.v0 = a0<<dist;
+    z.v64 = a64 << dist | a0 >> (-dist & 63);
+    z.v0 = a0 << dist;
    return z;
 }
 #else
-struct uint128
- softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist );
+struct uint128 softfloat_shortShiftLeft128(uint64_t a64, uint64_t a0, uint_fast8_t dist);
 #endif
 #endif

@@ -287,17 +274,14 @@ struct uint128
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
-struct uint128
- softfloat_shortShiftRight128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
-{
+struct uint128 softfloat_shortShiftRight128(uint64_t a64, uint64_t a0, uint_fast8_t dist) {
    struct uint128 z;
-    z.v64 = a64>>dist;
-    z.v0 = a64<<(-dist & 63) | a0>>dist;
+    z.v64 = a64 >> dist;
+    z.v0 = a64 << (-dist & 63) | a0 >> dist;
    return z;
 }
 #else
-struct uint128
- softfloat_shortShiftRight128( uint64_t a64, uint64_t a0, uint_fast8_t dist );
+struct uint128 softfloat_shortShiftRight128(uint64_t a64, uint64_t a0, uint_fast8_t dist);
 #endif
 #endif

@@ -308,19 +292,14 @@ struct uint128
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
-struct uint64_extra
- softfloat_shortShiftRightJam64Extra(
-     uint64_t a, uint64_t extra, uint_fast8_t dist )
-{
+struct uint64_extra softfloat_shortShiftRightJam64Extra(uint64_t a, uint64_t extra, uint_fast8_t dist) {
    struct uint64_extra z;
-    z.v = a>>dist;
-    z.extra = a<<(-dist & 63) | (extra != 0);
+    z.v = a >> dist;
+    z.extra = a << (-dist & 63) | (extra != 0);
    return z;
 }
 #else
-struct uint64_extra
- softfloat_shortShiftRightJam64Extra(
-     uint64_t a, uint64_t extra, uint_fast8_t dist );
+struct uint64_extra softfloat_shortShiftRightJam64Extra(uint64_t a, uint64_t extra, uint_fast8_t dist);
 #endif
 #endif

@@ -334,22 +313,15 @@ struct uint64_extra
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
 INLINE
-struct uint128
- softfloat_shortShiftRightJam128(
-     uint64_t a64, uint64_t a0, uint_fast8_t dist )
-{
+struct uint128 softfloat_shortShiftRightJam128(uint64_t a64, uint64_t a0, uint_fast8_t dist) {
    uint_fast8_t negDist = -dist;
    struct uint128 z;
-    z.v64 = a64>>dist;
-    z.v0 =
-        a64<<(negDist & 63) | a0>>dist
-            | ((uint64_t) (a0<<(negDist & 63)) != 0);
+    z.v64 = a64 >> dist;
+    z.v0 = a64 << (negDist & 63) | a0 >> dist | ((uint64_t)(a0 << (negDist & 63)) != 0);
    return z;
 }
 #else
-struct uint128
- softfloat_shortShiftRightJam128(
-     uint64_t a64, uint64_t a0, uint_fast8_t dist );
+struct uint128 softfloat_shortShiftRightJam128(uint64_t a64, uint64_t a0, uint_fast8_t dist);
 #endif
 #endif

@@ -360,21 +332,16 @@ struct uint128
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
 INLINE
-struct uint128_extra
- softfloat_shortShiftRightJam128Extra(
-     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist )
-{
+struct uint128_extra softfloat_shortShiftRightJam128Extra(uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist) {
    uint_fast8_t negDist = -dist;
    struct uint128_extra z;
-    z.v.v64 = a64>>dist;
-    z.v.v0 = a64<<(negDist & 63) | a0>>dist;
-    z.extra = a0<<(negDist & 63) | (extra != 0);
+    z.v.v64 = a64 >> dist;
+    z.v.v0 = a64 << (negDist & 63) | a0 >> dist;
+    z.extra = a0 << (negDist & 63) | (extra != 0);
    return z;
 }
 #else
-struct uint128_extra
- softfloat_shortShiftRightJam128Extra(
-     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist );
+struct uint128_extra softfloat_shortShiftRightJam128Extra(uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist);
 #endif
 #endif

@@ -397,14 +364,11 @@ struct uint128_extra
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (4 <= INLINE_LEVEL)
 INLINE
-struct uint64_extra
- softfloat_shiftRightJam64Extra(
-     uint64_t a, uint64_t extra, uint_fast32_t dist )
-{
+struct uint64_extra softfloat_shiftRightJam64Extra(uint64_t a, uint64_t extra, uint_fast32_t dist) {
    struct uint64_extra z;
-    if ( dist < 64 ) {
-        z.v = a>>dist;
-        z.extra = a<<(-dist & 63);
+    if(dist < 64) {
+        z.v = a >> dist;
+        z.extra = a << (-dist & 63);
    } else {
        z.v = 0;
        z.extra = (dist == 64) ? a : (a != 0);
@@ -413,9 +377,7 @@ struct uint64_extra
    return z;
 }
 #else
-struct uint64_extra
- softfloat_shiftRightJam64Extra(
-     uint64_t a, uint64_t extra, uint_fast32_t dist );
+struct uint64_extra softfloat_shiftRightJam64Extra(uint64_t a, uint64_t extra, uint_fast32_t dist);
 #endif
 #endif

@@ -430,8 +392,7 @@ struct uint64_extra
 | greater than 128, the result will be either 0 or 1, depending on whether the
 | original 128 bits are all zeros.
 *----------------------------------------------------------------------------*/
-struct uint128
- softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist );
+struct uint128 softfloat_shiftRightJam128(uint64_t a64, uint64_t a0, uint_fast32_t dist);
 #endif

 #ifndef softfloat_shiftRightJam128Extra
@@ -452,9 +413,7 @@ struct uint128
 | is modified as described above and returned in the 'extra' field of the
 | result.)
 *----------------------------------------------------------------------------*/
-struct uint128_extra
- softfloat_shiftRightJam128Extra(
-     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t dist );
+struct uint128_extra softfloat_shiftRightJam128Extra(uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t dist);
 #endif

 #ifndef softfloat_shiftRightJam256M
@@ -470,9 +429,7 @@ struct uint128_extra
 | is greater than 256, the stored result will be either 0 or 1, depending on
 | whether the original 256 bits are all zeros.
 *----------------------------------------------------------------------------*/
-void
- softfloat_shiftRightJam256M(
-     const uint64_t *aPtr, uint_fast32_t dist, uint64_t *zPtr );
+void softfloat_shiftRightJam256M(const uint64_t* aPtr, uint_fast32_t dist, uint64_t* zPtr);
 #endif

 #ifndef softfloat_add128
@@ -483,17 +440,14 @@ void
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
-struct uint128
- softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
-{
+struct uint128 softfloat_add128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0) {
    struct uint128 z;
    z.v0 = a0 + b0;
    z.v64 = a64 + b64 + (z.v0 < a0);
    return z;
 }
 #else
-struct uint128
- softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
+struct uint128 softfloat_add128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0);
 #endif
 #endif

@@ -505,9 +459,7 @@ struct uint128
 | an array of four 64-bit elements that concatenate in the platform's normal
 | endian order to form a 256-bit integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_add256M(
-     const uint64_t *aPtr, const uint64_t *bPtr, uint64_t *zPtr );
+void softfloat_add256M(const uint64_t* aPtr, const uint64_t* bPtr, uint64_t* zPtr);
 #endif

 #ifndef softfloat_sub128
@@ -518,9 +470,7 @@ void
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
-struct uint128
- softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
-{
+struct uint128 softfloat_sub128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0) {
    struct uint128 z;
    z.v0 = a0 - b0;
    z.v64 = a64 - b64;
@@ -528,8 +478,7 @@ struct uint128
    return z;
 }
 #else
-struct uint128
- softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
+struct uint128 softfloat_sub128(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0);
 #endif
 #endif

@@ -542,9 +491,7 @@ struct uint128
 | 64-bit elements that concatenate in the platform's normal endian order to
 | form a 256-bit integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_sub256M(
-     const uint64_t *aPtr, const uint64_t *bPtr, uint64_t *zPtr );
+void softfloat_sub256M(const uint64_t* aPtr, const uint64_t* bPtr, uint64_t* zPtr);
 #endif

 #ifndef softfloat_mul64ByShifted32To128
@@ -552,17 +499,16 @@ void
 | Returns the 128-bit product of 'a', 'b', and 2^32.
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
-INLINE struct uint128 softfloat_mul64ByShifted32To128( uint64_t a, uint32_t b )
-{
+INLINE struct uint128 softfloat_mul64ByShifted32To128(uint64_t a, uint32_t b) {
    uint_fast64_t mid;
    struct uint128 z;
-    mid = (uint_fast64_t) (uint32_t) a * b;
-    z.v0 = mid<<32;
-    z.v64 = (uint_fast64_t) (uint32_t) (a>>32) * b + (mid>>32);
+    mid = (uint_fast64_t)(uint32_t)a * b;
+    z.v0 = mid << 32;
+    z.v64 = (uint_fast64_t)(uint32_t)(a >> 32) * b + (mid >> 32);
    return z;
 }
 #else
-struct uint128 softfloat_mul64ByShifted32To128( uint64_t a, uint32_t b );
+struct uint128 softfloat_mul64ByShifted32To128(uint64_t a, uint32_t b);
 #endif
 #endif

@@ -570,7 +516,7 @@ struct uint128 softfloat_mul64ByShifted32To128( uint64_t a, uint32_t b );
 /*----------------------------------------------------------------------------
 | Returns the 128-bit product of 'a' and 'b'.
 *----------------------------------------------------------------------------*/
-struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b );
+struct uint128 softfloat_mul64To128(uint64_t a, uint64_t b);
 #endif

 #ifndef softfloat_mul128By32
@@ -581,19 +527,18 @@ struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b );
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (4 <= INLINE_LEVEL)
 INLINE
-struct uint128 softfloat_mul128By32( uint64_t a64, uint64_t a0, uint32_t b )
-{
+struct uint128 softfloat_mul128By32(uint64_t a64, uint64_t a0, uint32_t b) {
    struct uint128 z;
    uint_fast64_t mid;
    uint_fast32_t carry;
    z.v0 = a0 * b;
-    mid = (uint_fast64_t) (uint32_t) (a0>>32) * b;
-    carry = (uint32_t) ((uint_fast32_t) (z.v0>>32) - (uint_fast32_t) mid);
-    z.v64 = a64 * b + (uint_fast32_t) ((mid + carry)>>32);
+    mid = (uint_fast64_t)(uint32_t)(a0 >> 32) * b;
+    carry = (uint32_t)((uint_fast32_t)(z.v0 >> 32) - (uint_fast32_t)mid);
+    z.v64 = a64 * b + (uint_fast32_t)((mid + carry) >> 32);
    return z;
 }
 #else
-struct uint128 softfloat_mul128By32( uint64_t a64, uint64_t a0, uint32_t b );
+struct uint128 softfloat_mul128By32(uint64_t a64, uint64_t a0, uint32_t b);
 #endif
 #endif

@@ -605,9 +550,7 @@ struct uint128 softfloat_mul128By32( uint64_t a64, uint64_t a0, uint32_t b );
 | Argument 'zPtr' points to an array of four 64-bit elements that concatenate
 | in the platform's normal endian order to form a 256-bit integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_mul128To256M(
-     uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0, uint64_t *zPtr );
+void softfloat_mul128To256M(uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0, uint64_t* zPtr);
 #endif

 #else
@@ -626,7 +569,7 @@ void
 | Each of 'aPtr' and 'bPtr' points to an array of three 32-bit elements that
 | concatenate in the platform's normal endian order to form a 96-bit integer.
 *----------------------------------------------------------------------------*/
-int_fast8_t softfloat_compare96M( const uint32_t *aPtr, const uint32_t *bPtr );
+int_fast8_t softfloat_compare96M(const uint32_t* aPtr, const uint32_t* bPtr);
 #endif

 #ifndef softfloat_compare128M
@@ -638,8 +581,7 @@ int_fast8_t softfloat_compare96M( const uint32_t *aPtr, const uint32_t *bPtr );
 | Each of 'aPtr' and 'bPtr' points to an array of four 32-bit elements that
 | concatenate in the platform's normal endian order to form a 128-bit integer.
 *----------------------------------------------------------------------------*/
-int_fast8_t
- softfloat_compare128M( const uint32_t *aPtr, const uint32_t *bPtr );
+int_fast8_t softfloat_compare128M(const uint32_t* aPtr, const uint32_t* bPtr);
 #endif

 #ifndef softfloat_shortShiftLeft64To96M
@@ -652,19 +594,14 @@ int_fast8_t
 *----------------------------------------------------------------------------*/
 #if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
 INLINE
-void
- softfloat_shortShiftLeft64To96M(
-     uint64_t a, uint_fast8_t dist, uint32_t *zPtr )
-{
-    zPtr[indexWord( 3, 0 )] = (uint32_t) a<<dist;
+void softfloat_shortShiftLeft64To96M(uint64_t a, uint_fast8_t dist, uint32_t* zPtr) {
+    zPtr[indexWord(3, 0)] = (uint32_t)a << dist;
    a >>= 32 - dist;
-    zPtr[indexWord( 3, 2 )] = a>>32;
-    zPtr[indexWord( 3, 1 )] = a;
+    zPtr[indexWord(3, 2)] = a >> 32;
+    zPtr[indexWord(3, 1)] = a;
 }
 #else
-void
- softfloat_shortShiftLeft64To96M(
-     uint64_t a, uint_fast8_t dist, uint32_t *zPtr );
+void softfloat_shortShiftLeft64To96M(uint64_t a, uint_fast8_t dist, uint32_t* zPtr);
 #endif
 #endif

@@ -678,13 +615,7 @@ void
 | that concatenate in the platform's normal endian order to form an N-bit
 | integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_shortShiftLeftM(
-     uint_fast8_t size_words,
-     const uint32_t *aPtr,
-     uint_fast8_t dist,
-     uint32_t *zPtr
- );
+void softfloat_shortShiftLeftM(uint_fast8_t size_words, const uint32_t* aPtr, uint_fast8_t dist, uint32_t* zPtr);
 #endif

 #ifndef softfloat_shortShiftLeft96M
@@ -692,7 +623,7 @@ void
 | This function or macro is the same as 'softfloat_shortShiftLeftM' with
 | 'size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftLeft96M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 3, aPtr, dist, zPtr )
+#define softfloat_shortShiftLeft96M(aPtr, dist, zPtr) softfloat_shortShiftLeftM(3, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shortShiftLeft128M
@@ -700,7 +631,7 @@ void
 | This function or macro is the same as 'softfloat_shortShiftLeftM' with
 | 'size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftLeft128M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 4, aPtr, dist, zPtr )
+#define softfloat_shortShiftLeft128M(aPtr, dist, zPtr) softfloat_shortShiftLeftM(4, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shortShiftLeft160M
@@ -708,7 +639,7 @@ void
 | This function or macro is the same as 'softfloat_shortShiftLeftM' with
 | 'size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftLeft160M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 5, aPtr, dist, zPtr )
+#define softfloat_shortShiftLeft160M(aPtr, dist, zPtr) softfloat_shortShiftLeftM(5, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shiftLeftM
@@ -722,13 +653,7 @@ void
 |   The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
 | greater than N, the stored result will be 0.
 *----------------------------------------------------------------------------*/
-void
- softfloat_shiftLeftM(
-     uint_fast8_t size_words,
-     const uint32_t *aPtr,
-     uint32_t dist,
-     uint32_t *zPtr
- );
+void softfloat_shiftLeftM(uint_fast8_t size_words, const uint32_t* aPtr, uint32_t dist, uint32_t* zPtr);
 #endif

 #ifndef softfloat_shiftLeft96M
@@ -736,7 +661,7 @@ void
 | This function or macro is the same as 'softfloat_shiftLeftM' with
 | 'size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftLeft96M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 3, aPtr, dist, zPtr )
+#define softfloat_shiftLeft96M(aPtr, dist, zPtr) softfloat_shiftLeftM(3, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shiftLeft128M
@@ -744,7 +669,7 @@ void
 | This function or macro is the same as 'softfloat_shiftLeftM' with
 | 'size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftLeft128M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 4, aPtr, dist, zPtr )
+#define softfloat_shiftLeft128M(aPtr, dist, zPtr) softfloat_shiftLeftM(4, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shiftLeft160M
@@ -752,7 +677,7 @@ void
 | This function or macro is the same as 'softfloat_shiftLeftM' with
 | 'size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftLeft160M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 5, aPtr, dist, zPtr )
+#define softfloat_shiftLeft160M(aPtr, dist, zPtr) softfloat_shiftLeftM(5, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shortShiftRightM
@@ -765,13 +690,7 @@ void
 | that concatenate in the platform's normal endian order to form an N-bit
 | integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_shortShiftRightM(
-     uint_fast8_t size_words,
-     const uint32_t *aPtr,
-     uint_fast8_t dist,
-     uint32_t *zPtr
- );
+void softfloat_shortShiftRightM(uint_fast8_t size_words, const uint32_t* aPtr, uint_fast8_t dist, uint32_t* zPtr);
 #endif

 #ifndef softfloat_shortShiftRight128M
@@ -779,7 +698,7 @@ void
 | This function or macro is the same as 'softfloat_shortShiftRightM' with
 | 'size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftRight128M( aPtr, dist, zPtr ) softfloat_shortShiftRightM( 4, aPtr, dist, zPtr )
+#define softfloat_shortShiftRight128M(aPtr, dist, zPtr) softfloat_shortShiftRightM(4, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shortShiftRight160M
@@ -787,7 +706,7 @@ void
 | This function or macro is the same as 'softfloat_shortShiftRightM' with
 | 'size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftRight160M( aPtr, dist, zPtr ) softfloat_shortShiftRightM( 5, aPtr, dist, zPtr )
+#define softfloat_shortShiftRight160M(aPtr, dist, zPtr) softfloat_shortShiftRightM(5, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shortShiftRightJamM
@@ -801,9 +720,7 @@ void
 | to a 'size_words'-long array of 32-bit elements that concatenate in the
 | platform's normal endian order to form an N-bit integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_shortShiftRightJamM(
-     uint_fast8_t, const uint32_t *, uint_fast8_t, uint32_t * );
+void softfloat_shortShiftRightJamM(uint_fast8_t, const uint32_t*, uint_fast8_t, uint32_t*);
 #endif

 #ifndef softfloat_shortShiftRightJam160M
@@ -811,7 +728,7 @@ void
 | This function or macro is the same as 'softfloat_shortShiftRightJamM' with
 | 'size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shortShiftRightJam160M( aPtr, dist, zPtr ) softfloat_shortShiftRightJamM( 5, aPtr, dist, zPtr )
+#define softfloat_shortShiftRightJam160M(aPtr, dist, zPtr) softfloat_shortShiftRightJamM(5, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shiftRightM
@@ -825,13 +742,7 @@ void
 |   The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
 | greater than N, the stored result will be 0.
 *----------------------------------------------------------------------------*/
-void
- softfloat_shiftRightM(
-     uint_fast8_t size_words,
-     const uint32_t *aPtr,
-     uint32_t dist,
-     uint32_t *zPtr
- );
+void softfloat_shiftRightM(uint_fast8_t size_words, const uint32_t* aPtr, uint32_t dist, uint32_t* zPtr);
 #endif

 #ifndef softfloat_shiftRight96M
@@ -839,7 +750,7 @@ void
 | This function or macro is the same as 'softfloat_shiftRightM' with
 | 'size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftRight96M( aPtr, dist, zPtr ) softfloat_shiftRightM( 3, aPtr, dist, zPtr )
+#define softfloat_shiftRight96M(aPtr, dist, zPtr) softfloat_shiftRightM(3, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shiftRightJamM
@@ -856,13 +767,7 @@ void
 | is greater than N, the stored result will be either 0 or 1, depending on
 | whether the original N bits are all zeros.
 *----------------------------------------------------------------------------*/
-void
- softfloat_shiftRightJamM(
-     uint_fast8_t size_words,
-     const uint32_t *aPtr,
-     uint32_t dist,
-     uint32_t *zPtr
- );
+void softfloat_shiftRightJamM(uint_fast8_t size_words, const uint32_t* aPtr, uint32_t dist, uint32_t* zPtr);
 #endif

 #ifndef softfloat_shiftRightJam96M
@@ -870,7 +775,7 @@ void
 | This function or macro is the same as 'softfloat_shiftRightJamM' with
 | 'size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftRightJam96M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 3, aPtr, dist, zPtr )
+#define softfloat_shiftRightJam96M(aPtr, dist, zPtr) softfloat_shiftRightJamM(3, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shiftRightJam128M
@@ -878,7 +783,7 @@ void
 | This function or macro is the same as 'softfloat_shiftRightJamM' with
 | 'size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftRightJam128M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 4, aPtr, dist, zPtr )
+#define softfloat_shiftRightJam128M(aPtr, dist, zPtr) softfloat_shiftRightJamM(4, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_shiftRightJam160M
@@ -886,7 +791,7 @@ void
 | This function or macro is the same as 'softfloat_shiftRightJamM' with
 | 'size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_shiftRightJam160M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 5, aPtr, dist, zPtr )
+#define softfloat_shiftRightJam160M(aPtr, dist, zPtr) softfloat_shiftRightJamM(5, aPtr, dist, zPtr)
 #endif

 #ifndef softfloat_addM
@@ -898,13 +803,7 @@ void
 | elements that concatenate in the platform's normal endian order to form an
 | N-bit integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_addM(
-     uint_fast8_t size_words,
-     const uint32_t *aPtr,
-     const uint32_t *bPtr,
-     uint32_t *zPtr
- );
+void softfloat_addM(uint_fast8_t size_words, const uint32_t* aPtr, const uint32_t* bPtr, uint32_t* zPtr);
 #endif

 #ifndef softfloat_add96M
@@ -912,7 +811,7 @@ void
 | This function or macro is the same as 'softfloat_addM' with 'size_words'
 | = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_add96M( aPtr, bPtr, zPtr ) softfloat_addM( 3, aPtr, bPtr, zPtr )
+#define softfloat_add96M(aPtr, bPtr, zPtr) softfloat_addM(3, aPtr, bPtr, zPtr)
 #endif

 #ifndef softfloat_add128M
@@ -920,7 +819,7 @@ void
 | This function or macro is the same as 'softfloat_addM' with 'size_words'
 | = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_add128M( aPtr, bPtr, zPtr ) softfloat_addM( 4, aPtr, bPtr, zPtr )
+#define softfloat_add128M(aPtr, bPtr, zPtr) softfloat_addM(4, aPtr, bPtr, zPtr)
 #endif

 #ifndef softfloat_add160M
@@ -928,7 +827,7 @@ void
 | This function or macro is the same as 'softfloat_addM' with 'size_words'
 | = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_add160M( aPtr, bPtr, zPtr ) softfloat_addM( 5, aPtr, bPtr, zPtr )
+#define softfloat_add160M(aPtr, bPtr, zPtr) softfloat_addM(5, aPtr, bPtr, zPtr)
 #endif

 #ifndef softfloat_addCarryM
@@ -940,14 +839,7 @@ void
 | points to a 'size_words'-long array of 32-bit elements that concatenate in
 | the platform's normal endian order to form an N-bit integer.
 *----------------------------------------------------------------------------*/
-uint_fast8_t
- softfloat_addCarryM(
-     uint_fast8_t size_words,
-     const uint32_t *aPtr,
-     const uint32_t *bPtr,
-     uint_fast8_t carry,
-     uint32_t *zPtr
- );
+uint_fast8_t softfloat_addCarryM(uint_fast8_t size_words, const uint32_t* aPtr, const uint32_t* bPtr, uint_fast8_t carry, uint32_t* zPtr);
 #endif

 #ifndef softfloat_addComplCarryM
@@ -956,14 +848,8 @@ uint_fast8_t
 | the value of the unsigned integer pointed to by 'bPtr' is bit-wise completed
 | before the addition.
 *----------------------------------------------------------------------------*/
-uint_fast8_t
- softfloat_addComplCarryM(
-     uint_fast8_t size_words,
-     const uint32_t *aPtr,
-     const uint32_t *bPtr,
-     uint_fast8_t carry,
-     uint32_t *zPtr
- );
+uint_fast8_t softfloat_addComplCarryM(uint_fast8_t size_words, const uint32_t* aPtr, const uint32_t* bPtr, uint_fast8_t carry,
+                                      uint32_t* zPtr);
 #endif

 #ifndef softfloat_addComplCarry96M
@@ -971,7 +857,7 @@ uint_fast8_t
 | This function or macro is the same as 'softfloat_addComplCarryM' with
 | 'size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_addComplCarry96M( aPtr, bPtr, carry, zPtr ) softfloat_addComplCarryM( 3, aPtr, bPtr, carry, zPtr )
+#define softfloat_addComplCarry96M(aPtr, bPtr, carry, zPtr) softfloat_addComplCarryM(3, aPtr, bPtr, carry, zPtr)
 #endif

 #ifndef softfloat_negXM
@@ -981,7 +867,7 @@ uint_fast8_t
 | points to a 'size_words'-long array of 32-bit elements that concatenate in
 | the platform's normal endian order to form an N-bit integer.
 *----------------------------------------------------------------------------*/
-void softfloat_negXM( uint_fast8_t size_words, uint32_t *zPtr );
+void softfloat_negXM(uint_fast8_t size_words, uint32_t* zPtr);
 #endif

 #ifndef softfloat_negX96M
@@ -989,7 +875,7 @@ void softfloat_negXM( uint_fast8_t size_words, uint32_t *zPtr );
 | This function or macro is the same as 'softfloat_negXM' with 'size_words'
 | = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_negX96M( zPtr ) softfloat_negXM( 3, zPtr )
+#define softfloat_negX96M(zPtr) softfloat_negXM(3, zPtr)
 #endif

 #ifndef softfloat_negX128M
@@ -997,7 +883,7 @@ void softfloat_negXM( uint_fast8_t size_words, uint32_t *zPtr );
 | This function or macro is the same as 'softfloat_negXM' with 'size_words'
 | = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_negX128M( zPtr ) softfloat_negXM( 4, zPtr )
+#define softfloat_negX128M(zPtr) softfloat_negXM(4, zPtr)
 #endif

 #ifndef softfloat_negX160M
@@ -1005,7 +891,7 @@ void softfloat_negXM( uint_fast8_t size_words, uint32_t *zPtr );
 | This function or macro is the same as 'softfloat_negXM' with 'size_words'
 | = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_negX160M( zPtr ) softfloat_negXM( 5, zPtr )
+#define softfloat_negX160M(zPtr) softfloat_negXM(5, zPtr)
 #endif

 #ifndef softfloat_negX256M
@@ -1013,7 +899,7 @@ void softfloat_negXM( uint_fast8_t size_words, uint32_t *zPtr );
 | This function or macro is the same as 'softfloat_negXM' with 'size_words'
 | = 8 (N = 256).
 *----------------------------------------------------------------------------*/
-#define softfloat_negX256M( zPtr ) softfloat_negXM( 8, zPtr )
+#define softfloat_negX256M(zPtr) softfloat_negXM(8, zPtr)
 #endif

 #ifndef softfloat_sub1XM
@@ -1024,7 +910,7 @@ void softfloat_negXM( uint_fast8_t size_words, uint32_t *zPtr );
 | elements that concatenate in the platform's normal endian order to form an
 | N-bit integer.
 *----------------------------------------------------------------------------*/
-void softfloat_sub1XM( uint_fast8_t size_words, uint32_t *zPtr );
+void softfloat_sub1XM(uint_fast8_t size_words, uint32_t* zPtr);
 #endif

 #ifndef softfloat_sub1X96M
@@ -1032,7 +918,7 @@ void softfloat_sub1XM( uint_fast8_t size_words, uint32_t *zPtr );
 | This function or macro is the same as 'softfloat_sub1XM' with 'size_words'
 | = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_sub1X96M( zPtr ) softfloat_sub1XM( 3, zPtr )
+#define softfloat_sub1X96M(zPtr) softfloat_sub1XM(3, zPtr)
 #endif

 #ifndef softfloat_sub1X160M
@@ -1040,7 +926,7 @@ void softfloat_sub1XM( uint_fast8_t size_words, uint32_t *zPtr );
 | This function or macro is the same as 'softfloat_sub1XM' with 'size_words'
 | = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_sub1X160M( zPtr ) softfloat_sub1XM( 5, zPtr )
+#define softfloat_sub1X160M(zPtr) softfloat_sub1XM(5, zPtr)
 #endif

 #ifndef softfloat_subM
@@ -1052,13 +938,7 @@ void softfloat_sub1XM( uint_fast8_t size_words, uint32_t *zPtr );
 | array of 32-bit elements that concatenate in the platform's normal endian
 | order to form an N-bit integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_subM(
-     uint_fast8_t size_words,
-     const uint32_t *aPtr,
-     const uint32_t *bPtr,
-     uint32_t *zPtr
- );
+void softfloat_subM(uint_fast8_t size_words, const uint32_t* aPtr, const uint32_t* bPtr, uint32_t* zPtr);
 #endif

 #ifndef softfloat_sub96M
@@ -1066,7 +946,7 @@ void
 | This function or macro is the same as 'softfloat_subM' with 'size_words'
 | = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_sub96M( aPtr, bPtr, zPtr ) softfloat_subM( 3, aPtr, bPtr, zPtr )
+#define softfloat_sub96M(aPtr, bPtr, zPtr) softfloat_subM(3, aPtr, bPtr, zPtr)
 #endif

 #ifndef softfloat_sub128M
@@ -1074,7 +954,7 @@ void
 | This function or macro is the same as 'softfloat_subM' with 'size_words'
 | = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_sub128M( aPtr, bPtr, zPtr ) softfloat_subM( 4, aPtr, bPtr, zPtr )
+#define softfloat_sub128M(aPtr, bPtr, zPtr) softfloat_subM(4, aPtr, bPtr, zPtr)
 #endif

 #ifndef softfloat_sub160M
@@ -1082,7 +962,7 @@ void
 | This function or macro is the same as 'softfloat_subM' with 'size_words'
 | = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_sub160M( aPtr, bPtr, zPtr ) softfloat_subM( 5, aPtr, bPtr, zPtr )
+#define softfloat_sub160M(aPtr, bPtr, zPtr) softfloat_subM(5, aPtr, bPtr, zPtr)
 #endif

 #ifndef softfloat_mul64To128M
@@ -1092,7 +972,7 @@ void
 | elements that concatenate in the platform's normal endian order to form a
 | 128-bit integer.
 *----------------------------------------------------------------------------*/
-void softfloat_mul64To128M( uint64_t a, uint64_t b, uint32_t *zPtr );
+void softfloat_mul64To128M(uint64_t a, uint64_t b, uint32_t* zPtr);
 #endif

 #ifndef softfloat_mul128MTo256M
@@ -1104,9 +984,7 @@ void softfloat_mul64To128M( uint64_t a, uint64_t b, uint32_t *zPtr );
 | Argument 'zPtr' points to an array of eight 32-bit elements that concatenate
 | to form a 256-bit integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_mul128MTo256M(
-     const uint32_t *aPtr, const uint32_t *bPtr, uint32_t *zPtr );
+void softfloat_mul128MTo256M(const uint32_t* aPtr, const uint32_t* bPtr, uint32_t* zPtr);
 #endif

 #ifndef softfloat_remStepMBy32
@@ -1119,15 +997,8 @@ void
 | to a 'size_words'-long array of 32-bit elements that concatenate in the
 | platform's normal endian order to form an N-bit integer.
 *----------------------------------------------------------------------------*/
-void
- softfloat_remStepMBy32(
-     uint_fast8_t size_words,
-     const uint32_t *remPtr,
-     uint_fast8_t dist,
-     const uint32_t *bPtr,
-     uint32_t q,
-     uint32_t *zPtr
- );
+void softfloat_remStepMBy32(uint_fast8_t size_words, const uint32_t* remPtr, uint_fast8_t dist, const uint32_t* bPtr, uint32_t q,
+                            uint32_t* zPtr);
 #endif

 #ifndef softfloat_remStep96MBy32
@@ -1135,7 +1006,7 @@ void
 | This function or macro is the same as 'softfloat_remStepMBy32' with
 | 'size_words' = 3 (N = 96).
 *----------------------------------------------------------------------------*/
-#define softfloat_remStep96MBy32( remPtr, dist, bPtr, q, zPtr ) softfloat_remStepMBy32( 3, remPtr, dist, bPtr, q, zPtr )
+#define softfloat_remStep96MBy32(remPtr, dist, bPtr, q, zPtr) softfloat_remStepMBy32(3, remPtr, dist, bPtr, q, zPtr)
 #endif

 #ifndef softfloat_remStep128MBy32
@@ -1143,7 +1014,7 @@ void
 | This function or macro is the same as 'softfloat_remStepMBy32' with
 | 'size_words' = 4 (N = 128).
 *----------------------------------------------------------------------------*/
-#define softfloat_remStep128MBy32( remPtr, dist, bPtr, q, zPtr ) softfloat_remStepMBy32( 4, remPtr, dist, bPtr, q, zPtr )
+#define softfloat_remStep128MBy32(remPtr, dist, bPtr, q, zPtr) softfloat_remStepMBy32(4, remPtr, dist, bPtr, q, zPtr)
 #endif

 #ifndef softfloat_remStep160MBy32
@@ -1151,10 +1022,9 @@ void
 | This function or macro is the same as 'softfloat_remStepMBy32' with
 | 'size_words' = 5 (N = 160).
 *----------------------------------------------------------------------------*/
-#define softfloat_remStep160MBy32( remPtr, dist, bPtr, q, zPtr ) softfloat_remStepMBy32( 5, remPtr, dist, bPtr, q, zPtr )
+#define softfloat_remStep160MBy32(remPtr, dist, bPtr, q, zPtr) softfloat_remStepMBy32(5, remPtr, dist, bPtr, q, zPtr)
 #endif

 #endif

 #endif
-
@@ -34,7 +34,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 =============================================================================*/

-
 /*============================================================================
 | Note:  If SoftFloat is made available as a general library for programs to
 | use, it is strongly recommended that a platform-specific version of this
@@ -42,13 +41,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | eliminates all dependencies on compile-time macros.
 *============================================================================*/

-
 #ifndef softfloat_h
 #define softfloat_h 1

+#include "softfloat_types.h"
 #include <stdbool.h>
 #include <stdint.h>
-#include "softfloat_types.h"

 #ifndef THREAD_LOCAL
 #define THREAD_LOCAL
@@ -58,10 +56,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | Software floating-point underflow tininess-detection mode.
 *----------------------------------------------------------------------------*/
 extern THREAD_LOCAL uint_fast8_t softfloat_detectTininess;
-enum {
-    softfloat_tininess_beforeRounding = 0,
-    softfloat_tininess_afterRounding  = 1
-};
+enum { softfloat_tininess_beforeRounding = 0, softfloat_tininess_afterRounding = 1 };

 /*----------------------------------------------------------------------------
 | Software floating-point rounding mode.  (Mode "odd" is supported only if
@@ -69,12 +64,12 @@ enum {
 *----------------------------------------------------------------------------*/
 extern THREAD_LOCAL uint_fast8_t softfloat_roundingMode;
 enum {
-    softfloat_round_near_even   = 0,
-    softfloat_round_minMag      = 1,
-    softfloat_round_min         = 2,
-    softfloat_round_max         = 3,
+    softfloat_round_near_even = 0,
+    softfloat_round_minMag = 1,
+    softfloat_round_min = 2,
+    softfloat_round_max = 3,
    softfloat_round_near_maxMag = 4,
-    softfloat_round_odd         = 6
+    softfloat_round_odd = 6
 };

 /*----------------------------------------------------------------------------
@@ -82,169 +77,169 @@ enum {
 *----------------------------------------------------------------------------*/
 extern THREAD_LOCAL uint_fast8_t softfloat_exceptionFlags;
 typedef enum {
-    softfloat_flag_inexact   =  1,
-    softfloat_flag_underflow =  2,
-    softfloat_flag_overflow  =  4,
-    softfloat_flag_infinite  =  8,
-    softfloat_flag_invalid   = 16
+    softfloat_flag_inexact = 1,
+    softfloat_flag_underflow = 2,
+    softfloat_flag_overflow = 4,
+    softfloat_flag_infinite = 8,
+    softfloat_flag_invalid = 16
 } exceptionFlag_t;

 /*----------------------------------------------------------------------------
 | Routine to raise any or all of the software floating-point exception flags.
 *----------------------------------------------------------------------------*/
-void softfloat_raiseFlags( uint_fast8_t );
+void softfloat_raiseFlags(uint_fast8_t);

 /*----------------------------------------------------------------------------
 | Integer-to-floating-point conversion routines.
 *----------------------------------------------------------------------------*/
-float16_t ui32_to_f16( uint32_t );
-float32_t ui32_to_f32( uint32_t );
-float64_t ui32_to_f64( uint32_t );
+float16_t ui32_to_f16(uint32_t);
+float32_t ui32_to_f32(uint32_t);
+float64_t ui32_to_f64(uint32_t);
 #ifdef SOFTFLOAT_FAST_INT64
-extFloat80_t ui32_to_extF80( uint32_t );
-float128_t ui32_to_f128( uint32_t );
+extFloat80_t ui32_to_extF80(uint32_t);
+float128_t ui32_to_f128(uint32_t);
 #endif
-void ui32_to_extF80M( uint32_t, extFloat80_t * );
-void ui32_to_f128M( uint32_t, float128_t * );
-float16_t ui64_to_f16( uint64_t );
-float32_t ui64_to_f32( uint64_t );
-float64_t ui64_to_f64( uint64_t );
+void ui32_to_extF80M(uint32_t, extFloat80_t*);
+void ui32_to_f128M(uint32_t, float128_t*);
+float16_t ui64_to_f16(uint64_t);
+float32_t ui64_to_f32(uint64_t);
+float64_t ui64_to_f64(uint64_t);
 #ifdef SOFTFLOAT_FAST_INT64
-extFloat80_t ui64_to_extF80( uint64_t );
-float128_t ui64_to_f128( uint64_t );
+extFloat80_t ui64_to_extF80(uint64_t);
+float128_t ui64_to_f128(uint64_t);
 #endif
-void ui64_to_extF80M( uint64_t, extFloat80_t * );
-void ui64_to_f128M( uint64_t, float128_t * );
-float16_t i32_to_f16( int32_t );
-float32_t i32_to_f32( int32_t );
-float64_t i32_to_f64( int32_t );
+void ui64_to_extF80M(uint64_t, extFloat80_t*);
+void ui64_to_f128M(uint64_t, float128_t*);
+float16_t i32_to_f16(int32_t);
+float32_t i32_to_f32(int32_t);
+float64_t i32_to_f64(int32_t);
 #ifdef SOFTFLOAT_FAST_INT64
-extFloat80_t i32_to_extF80( int32_t );
-float128_t i32_to_f128( int32_t );
+extFloat80_t i32_to_extF80(int32_t);
+float128_t i32_to_f128(int32_t);
 #endif
-void i32_to_extF80M( int32_t, extFloat80_t * );
-void i32_to_f128M( int32_t, float128_t * );
-float16_t i64_to_f16( int64_t );
-float32_t i64_to_f32( int64_t );
-float64_t i64_to_f64( int64_t );
+void i32_to_extF80M(int32_t, extFloat80_t*);
+void i32_to_f128M(int32_t, float128_t*);
+float16_t i64_to_f16(int64_t);
+float32_t i64_to_f32(int64_t);
+float64_t i64_to_f64(int64_t);
 #ifdef SOFTFLOAT_FAST_INT64
-extFloat80_t i64_to_extF80( int64_t );
-float128_t i64_to_f128( int64_t );
+extFloat80_t i64_to_extF80(int64_t);
+float128_t i64_to_f128(int64_t);
 #endif
-void i64_to_extF80M( int64_t, extFloat80_t * );
-void i64_to_f128M( int64_t, float128_t * );
+void i64_to_extF80M(int64_t, extFloat80_t*);
+void i64_to_f128M(int64_t, float128_t*);

 /*----------------------------------------------------------------------------
 | 16-bit (half-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
-uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool );
-uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool );
-int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool );
-int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool );
-uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool );
-uint_fast64_t f16_to_ui64_r_minMag( float16_t, bool );
-int_fast32_t f16_to_i32_r_minMag( float16_t, bool );
-int_fast64_t f16_to_i64_r_minMag( float16_t, bool );
-float32_t f16_to_f32( float16_t );
-float64_t f16_to_f64( float16_t );
+uint_fast32_t f16_to_ui32(float16_t, uint_fast8_t, bool);
+uint_fast64_t f16_to_ui64(float16_t, uint_fast8_t, bool);
+int_fast32_t f16_to_i32(float16_t, uint_fast8_t, bool);
+int_fast64_t f16_to_i64(float16_t, uint_fast8_t, bool);
+uint_fast32_t f16_to_ui32_r_minMag(float16_t, bool);
+uint_fast64_t f16_to_ui64_r_minMag(float16_t, bool);
+int_fast32_t f16_to_i32_r_minMag(float16_t, bool);
+int_fast64_t f16_to_i64_r_minMag(float16_t, bool);
+float32_t f16_to_f32(float16_t);
+float64_t f16_to_f64(float16_t);
 #ifdef SOFTFLOAT_FAST_INT64
-extFloat80_t f16_to_extF80( float16_t );
-float128_t f16_to_f128( float16_t );
+extFloat80_t f16_to_extF80(float16_t);
+float128_t f16_to_f128(float16_t);
 #endif
-void f16_to_extF80M( float16_t, extFloat80_t * );
-void f16_to_f128M( float16_t, float128_t * );
-float16_t f16_roundToInt( float16_t, uint_fast8_t, bool );
-float16_t f16_add( float16_t, float16_t );
-float16_t f16_sub( float16_t, float16_t );
-float16_t f16_mul( float16_t, float16_t );
-float16_t f16_mulAdd( float16_t, float16_t, float16_t );
-float16_t f16_div( float16_t, float16_t );
-float16_t f16_rem( float16_t, float16_t );
-float16_t f16_sqrt( float16_t );
-bool f16_eq( float16_t, float16_t );
-bool f16_le( float16_t, float16_t );
-bool f16_lt( float16_t, float16_t );
-bool f16_eq_signaling( float16_t, float16_t );
-bool f16_le_quiet( float16_t, float16_t );
-bool f16_lt_quiet( float16_t, float16_t );
-bool f16_isSignalingNaN( float16_t );
+void f16_to_extF80M(float16_t, extFloat80_t*);
+void f16_to_f128M(float16_t, float128_t*);
+float16_t f16_roundToInt(float16_t, uint_fast8_t, bool);
+float16_t f16_add(float16_t, float16_t);
+float16_t f16_sub(float16_t, float16_t);
+float16_t f16_mul(float16_t, float16_t);
+float16_t f16_mulAdd(float16_t, float16_t, float16_t);
+float16_t f16_div(float16_t, float16_t);
+float16_t f16_rem(float16_t, float16_t);
+float16_t f16_sqrt(float16_t);
+bool f16_eq(float16_t, float16_t);
+bool f16_le(float16_t, float16_t);
+bool f16_lt(float16_t, float16_t);
+bool f16_eq_signaling(float16_t, float16_t);
+bool f16_le_quiet(float16_t, float16_t);
+bool f16_lt_quiet(float16_t, float16_t);
+bool f16_isSignalingNaN(float16_t);

 /*----------------------------------------------------------------------------
 | 16-bit (brain float 16) floating-point operations.
 *----------------------------------------------------------------------------*/
-float32_t bf16_to_f32( bfloat16_t );
-bfloat16_t f32_to_bf16( float32_t );
-bool bf16_isSignalingNaN( bfloat16_t );
+float32_t bf16_to_f32(bfloat16_t);
+bfloat16_t f32_to_bf16(float32_t);
+bool bf16_isSignalingNaN(bfloat16_t);

 /*----------------------------------------------------------------------------
 | 32-bit (single-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
-uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
-uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool );
-int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
-int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool );
-uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
-uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
-int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
-int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
-float16_t f32_to_f16( float32_t );
-float64_t f32_to_f64( float32_t );
+uint_fast32_t f32_to_ui32(float32_t, uint_fast8_t, bool);
+uint_fast64_t f32_to_ui64(float32_t, uint_fast8_t, bool);
+int_fast32_t f32_to_i32(float32_t, uint_fast8_t, bool);
+int_fast64_t f32_to_i64(float32_t, uint_fast8_t, bool);
+uint_fast32_t f32_to_ui32_r_minMag(float32_t, bool);
+uint_fast64_t f32_to_ui64_r_minMag(float32_t, bool);
+int_fast32_t f32_to_i32_r_minMag(float32_t, bool);
+int_fast64_t f32_to_i64_r_minMag(float32_t, bool);
+float16_t f32_to_f16(float32_t);
+float64_t f32_to_f64(float32_t);
 #ifdef SOFTFLOAT_FAST_INT64
-extFloat80_t f32_to_extF80( float32_t );
-float128_t f32_to_f128( float32_t );
+extFloat80_t f32_to_extF80(float32_t);
+float128_t f32_to_f128(float32_t);
 #endif
-void f32_to_extF80M( float32_t, extFloat80_t * );
-void f32_to_f128M( float32_t, float128_t * );
-float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
-float32_t f32_add( float32_t, float32_t );
-float32_t f32_sub( float32_t, float32_t );
-float32_t f32_mul( float32_t, float32_t );
-float32_t f32_mulAdd( float32_t, float32_t, float32_t );
-float32_t f32_div( float32_t, float32_t );
-float32_t f32_rem( float32_t, float32_t );
-float32_t f32_sqrt( float32_t );
-bool f32_eq( float32_t, float32_t );
-bool f32_le( float32_t, float32_t );
-bool f32_lt( float32_t, float32_t );
-bool f32_eq_signaling( float32_t, float32_t );
-bool f32_le_quiet( float32_t, float32_t );
-bool f32_lt_quiet( float32_t, float32_t );
-bool f32_isSignalingNaN( float32_t );
+void f32_to_extF80M(float32_t, extFloat80_t*);
+void f32_to_f128M(float32_t, float128_t*);
+float32_t f32_roundToInt(float32_t, uint_fast8_t, bool);
+float32_t f32_add(float32_t, float32_t);
+float32_t f32_sub(float32_t, float32_t);
+float32_t f32_mul(float32_t, float32_t);
+float32_t f32_mulAdd(float32_t, float32_t, float32_t);
+float32_t f32_div(float32_t, float32_t);
+float32_t f32_rem(float32_t, float32_t);
+float32_t f32_sqrt(float32_t);
+bool f32_eq(float32_t, float32_t);
+bool f32_le(float32_t, float32_t);
+bool f32_lt(float32_t, float32_t);
+bool f32_eq_signaling(float32_t, float32_t);
+bool f32_le_quiet(float32_t, float32_t);
+bool f32_lt_quiet(float32_t, float32_t);
+bool f32_isSignalingNaN(float32_t);

 /*----------------------------------------------------------------------------
 | 64-bit (double-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
-uint_fast32_t f64_to_ui32( float64_t, uint_fast8_t, bool );
-uint_fast64_t f64_to_ui64( float64_t, uint_fast8_t, bool );
-int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
-int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
-uint_fast32_t f64_to_ui32_r_minMag( float64_t, bool );
-uint_fast64_t f64_to_ui64_r_minMag( float64_t, bool );
-int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
-int_fast64_t f64_to_i64_r_minMag( float64_t, bool );
-float16_t f64_to_f16( float64_t );
-float32_t f64_to_f32( float64_t );
+uint_fast32_t f64_to_ui32(float64_t, uint_fast8_t, bool);
+uint_fast64_t f64_to_ui64(float64_t, uint_fast8_t, bool);
+int_fast32_t f64_to_i32(float64_t, uint_fast8_t, bool);
+int_fast64_t f64_to_i64(float64_t, uint_fast8_t, bool);
+uint_fast32_t f64_to_ui32_r_minMag(float64_t, bool);
+uint_fast64_t f64_to_ui64_r_minMag(float64_t, bool);
+int_fast32_t f64_to_i32_r_minMag(float64_t, bool);
+int_fast64_t f64_to_i64_r_minMag(float64_t, bool);
+float16_t f64_to_f16(float64_t);
+float32_t f64_to_f32(float64_t);
 #ifdef SOFTFLOAT_FAST_INT64
-extFloat80_t f64_to_extF80( float64_t );
-float128_t f64_to_f128( float64_t );
+extFloat80_t f64_to_extF80(float64_t);
+float128_t f64_to_f128(float64_t);
 #endif
-void f64_to_extF80M( float64_t, extFloat80_t * );
-void f64_to_f128M( float64_t, float128_t * );
-float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
-float64_t f64_add( float64_t, float64_t );
-float64_t f64_sub( float64_t, float64_t );
-float64_t f64_mul( float64_t, float64_t );
-float64_t f64_mulAdd( float64_t, float64_t, float64_t );
-float64_t f64_div( float64_t, float64_t );
-float64_t f64_rem( float64_t, float64_t );
-float64_t f64_sqrt( float64_t );
-bool f64_eq( float64_t, float64_t );
-bool f64_le( float64_t, float64_t );
-bool f64_lt( float64_t, float64_t );
-bool f64_eq_signaling( float64_t, float64_t );
-bool f64_le_quiet( float64_t, float64_t );
-bool f64_lt_quiet( float64_t, float64_t );
-bool f64_isSignalingNaN( float64_t );
+void f64_to_extF80M(float64_t, extFloat80_t*);
+void f64_to_f128M(float64_t, float128_t*);
+float64_t f64_roundToInt(float64_t, uint_fast8_t, bool);
+float64_t f64_add(float64_t, float64_t);
+float64_t f64_sub(float64_t, float64_t);
+float64_t f64_mul(float64_t, float64_t);
+float64_t f64_mulAdd(float64_t, float64_t, float64_t);
+float64_t f64_div(float64_t, float64_t);
+float64_t f64_rem(float64_t, float64_t);
+float64_t f64_sqrt(float64_t);
+bool f64_eq(float64_t, float64_t);
+bool f64_le(float64_t, float64_t);
+bool f64_lt(float64_t, float64_t);
+bool f64_eq_signaling(float64_t, float64_t);
+bool f64_le_quiet(float64_t, float64_t);
+bool f64_lt_quiet(float64_t, float64_t);
+bool f64_isSignalingNaN(float64_t);

 /*----------------------------------------------------------------------------
 | Rounding precision for 80-bit extended double-precision floating-point.
@@ -256,124 +251,118 @@ extern THREAD_LOCAL uint_fast8_t extF80_roundingPrecision;
 | 80-bit extended double-precision floating-point operations.
 *----------------------------------------------------------------------------*/
 #ifdef SOFTFLOAT_FAST_INT64
-uint_fast32_t extF80_to_ui32( extFloat80_t, uint_fast8_t, bool );
-uint_fast64_t extF80_to_ui64( extFloat80_t, uint_fast8_t, bool );
-int_fast32_t extF80_to_i32( extFloat80_t, uint_fast8_t, bool );
-int_fast64_t extF80_to_i64( extFloat80_t, uint_fast8_t, bool );
-uint_fast32_t extF80_to_ui32_r_minMag( extFloat80_t, bool );
-uint_fast64_t extF80_to_ui64_r_minMag( extFloat80_t, bool );
-int_fast32_t extF80_to_i32_r_minMag( extFloat80_t, bool );
-int_fast64_t extF80_to_i64_r_minMag( extFloat80_t, bool );
-float16_t extF80_to_f16( extFloat80_t );
-float32_t extF80_to_f32( extFloat80_t );
-float64_t extF80_to_f64( extFloat80_t );
-float128_t extF80_to_f128( extFloat80_t );
-extFloat80_t extF80_roundToInt( extFloat80_t, uint_fast8_t, bool );
-extFloat80_t extF80_add( extFloat80_t, extFloat80_t );
-extFloat80_t extF80_sub( extFloat80_t, extFloat80_t );
-extFloat80_t extF80_mul( extFloat80_t, extFloat80_t );
-extFloat80_t extF80_div( extFloat80_t, extFloat80_t );
-extFloat80_t extF80_rem( extFloat80_t, extFloat80_t );
-extFloat80_t extF80_sqrt( extFloat80_t );
-bool extF80_eq( extFloat80_t, extFloat80_t );
-bool extF80_le( extFloat80_t, extFloat80_t );
-bool extF80_lt( extFloat80_t, extFloat80_t );
-bool extF80_eq_signaling( extFloat80_t, extFloat80_t );
-bool extF80_le_quiet( extFloat80_t, extFloat80_t );
-bool extF80_lt_quiet( extFloat80_t, extFloat80_t );
-bool extF80_isSignalingNaN( extFloat80_t );
+uint_fast32_t extF80_to_ui32(extFloat80_t, uint_fast8_t, bool);
+uint_fast64_t extF80_to_ui64(extFloat80_t, uint_fast8_t, bool);
+int_fast32_t extF80_to_i32(extFloat80_t, uint_fast8_t, bool);
+int_fast64_t extF80_to_i64(extFloat80_t, uint_fast8_t, bool);
+uint_fast32_t extF80_to_ui32_r_minMag(extFloat80_t, bool);
+uint_fast64_t extF80_to_ui64_r_minMag(extFloat80_t, bool);
+int_fast32_t extF80_to_i32_r_minMag(extFloat80_t, bool);
+int_fast64_t extF80_to_i64_r_minMag(extFloat80_t, bool);
+float16_t extF80_to_f16(extFloat80_t);
+float32_t extF80_to_f32(extFloat80_t);
+float64_t extF80_to_f64(extFloat80_t);
+float128_t extF80_to_f128(extFloat80_t);
+extFloat80_t extF80_roundToInt(extFloat80_t, uint_fast8_t, bool);
+extFloat80_t extF80_add(extFloat80_t, extFloat80_t);
+extFloat80_t extF80_sub(extFloat80_t, extFloat80_t);
+extFloat80_t extF80_mul(extFloat80_t, extFloat80_t);
+extFloat80_t extF80_div(extFloat80_t, extFloat80_t);
+extFloat80_t extF80_rem(extFloat80_t, extFloat80_t);
+extFloat80_t extF80_sqrt(extFloat80_t);
+bool extF80_eq(extFloat80_t, extFloat80_t);
+bool extF80_le(extFloat80_t, extFloat80_t);
+bool extF80_lt(extFloat80_t, extFloat80_t);
+bool extF80_eq_signaling(extFloat80_t, extFloat80_t);
+bool extF80_le_quiet(extFloat80_t, extFloat80_t);
+bool extF80_lt_quiet(extFloat80_t, extFloat80_t);
+bool extF80_isSignalingNaN(extFloat80_t);
 #endif
-uint_fast32_t extF80M_to_ui32( const extFloat80_t *, uint_fast8_t, bool );
-uint_fast64_t extF80M_to_ui64( const extFloat80_t *, uint_fast8_t, bool );
-int_fast32_t extF80M_to_i32( const extFloat80_t *, uint_fast8_t, bool );
-int_fast64_t extF80M_to_i64( const extFloat80_t *, uint_fast8_t, bool );
-uint_fast32_t extF80M_to_ui32_r_minMag( const extFloat80_t *, bool );
-uint_fast64_t extF80M_to_ui64_r_minMag( const extFloat80_t *, bool );
-int_fast32_t extF80M_to_i32_r_minMag( const extFloat80_t *, bool );
-int_fast64_t extF80M_to_i64_r_minMag( const extFloat80_t *, bool );
-float16_t extF80M_to_f16( const extFloat80_t * );
-float32_t extF80M_to_f32( const extFloat80_t * );
-float64_t extF80M_to_f64( const extFloat80_t * );
-void extF80M_to_f128M( const extFloat80_t *, float128_t * );
-void
- extF80M_roundToInt(
-     const extFloat80_t *, uint_fast8_t, bool, extFloat80_t * );
-void extF80M_add( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
-void extF80M_sub( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
-void extF80M_mul( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
-void extF80M_div( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
-void extF80M_rem( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
-void extF80M_sqrt( const extFloat80_t *, extFloat80_t * );
-bool extF80M_eq( const extFloat80_t *, const extFloat80_t * );
-bool extF80M_le( const extFloat80_t *, const extFloat80_t * );
-bool extF80M_lt( const extFloat80_t *, const extFloat80_t * );
-bool extF80M_eq_signaling( const extFloat80_t *, const extFloat80_t * );
-bool extF80M_le_quiet( const extFloat80_t *, const extFloat80_t * );
-bool extF80M_lt_quiet( const extFloat80_t *, const extFloat80_t * );
-bool extF80M_isSignalingNaN( const extFloat80_t * );
+uint_fast32_t extF80M_to_ui32(const extFloat80_t*, uint_fast8_t, bool);
+uint_fast64_t extF80M_to_ui64(const extFloat80_t*, uint_fast8_t, bool);
+int_fast32_t extF80M_to_i32(const extFloat80_t*, uint_fast8_t, bool);
+int_fast64_t extF80M_to_i64(const extFloat80_t*, uint_fast8_t, bool);
+uint_fast32_t extF80M_to_ui32_r_minMag(const extFloat80_t*, bool);
+uint_fast64_t extF80M_to_ui64_r_minMag(const extFloat80_t*, bool);
+int_fast32_t extF80M_to_i32_r_minMag(const extFloat80_t*, bool);
+int_fast64_t extF80M_to_i64_r_minMag(const extFloat80_t*, bool);
+float16_t extF80M_to_f16(const extFloat80_t*);
+float32_t extF80M_to_f32(const extFloat80_t*);
+float64_t extF80M_to_f64(const extFloat80_t*);
+void extF80M_to_f128M(const extFloat80_t*, float128_t*);
+void extF80M_roundToInt(const extFloat80_t*, uint_fast8_t, bool, extFloat80_t*);
+void extF80M_add(const extFloat80_t*, const extFloat80_t*, extFloat80_t*);
+void extF80M_sub(const extFloat80_t*, const extFloat80_t*, extFloat80_t*);
+void extF80M_mul(const extFloat80_t*, const extFloat80_t*, extFloat80_t*);
+void extF80M_div(const extFloat80_t*, const extFloat80_t*, extFloat80_t*);
+void extF80M_rem(const extFloat80_t*, const extFloat80_t*, extFloat80_t*);
+void extF80M_sqrt(const extFloat80_t*, extFloat80_t*);
+bool extF80M_eq(const extFloat80_t*, const extFloat80_t*);
+bool extF80M_le(const extFloat80_t*, const extFloat80_t*);
+bool extF80M_lt(const extFloat80_t*, const extFloat80_t*);
+bool extF80M_eq_signaling(const extFloat80_t*, const extFloat80_t*);
+bool extF80M_le_quiet(const extFloat80_t*, const extFloat80_t*);
+bool extF80M_lt_quiet(const extFloat80_t*, const extFloat80_t*);
+bool extF80M_isSignalingNaN(const extFloat80_t*);

 /*----------------------------------------------------------------------------
 | 128-bit (quadruple-precision) floating-point operations.
 *----------------------------------------------------------------------------*/
 #ifdef SOFTFLOAT_FAST_INT64
-uint_fast32_t f128_to_ui32( float128_t, uint_fast8_t, bool );
-uint_fast64_t f128_to_ui64( float128_t, uint_fast8_t, bool );
-int_fast32_t f128_to_i32( float128_t, uint_fast8_t, bool );
-int_fast64_t f128_to_i64( float128_t, uint_fast8_t, bool );
-uint_fast32_t f128_to_ui32_r_minMag( float128_t, bool );
-uint_fast64_t f128_to_ui64_r_minMag( float128_t, bool );
-int_fast32_t f128_to_i32_r_minMag( float128_t, bool );
-int_fast64_t f128_to_i64_r_minMag( float128_t, bool );
-float16_t f128_to_f16( float128_t );
-float32_t f128_to_f32( float128_t );
-float64_t f128_to_f64( float128_t );
-extFloat80_t f128_to_extF80( float128_t );
-float128_t f128_roundToInt( float128_t, uint_fast8_t, bool );
-float128_t f128_add( float128_t, float128_t );
-float128_t f128_sub( float128_t, float128_t );
-float128_t f128_mul( float128_t, float128_t );
-float128_t f128_mulAdd( float128_t, float128_t, float128_t );
-float128_t f128_div( float128_t, float128_t );
-float128_t f128_rem( float128_t, float128_t );
-float128_t f128_sqrt( float128_t );
-bool f128_eq( float128_t, float128_t );
-bool f128_le( float128_t, float128_t );
-bool f128_lt( float128_t, float128_t );
-bool f128_eq_signaling( float128_t, float128_t );
-bool f128_le_quiet( float128_t, float128_t );
-bool f128_lt_quiet( float128_t, float128_t );
-bool f128_isSignalingNaN( float128_t );
+uint_fast32_t f128_to_ui32(float128_t, uint_fast8_t, bool);
+uint_fast64_t f128_to_ui64(float128_t, uint_fast8_t, bool);
+int_fast32_t f128_to_i32(float128_t, uint_fast8_t, bool);
+int_fast64_t f128_to_i64(float128_t, uint_fast8_t, bool);
+uint_fast32_t f128_to_ui32_r_minMag(float128_t, bool);
+uint_fast64_t f128_to_ui64_r_minMag(float128_t, bool);
+int_fast32_t f128_to_i32_r_minMag(float128_t, bool);
+int_fast64_t f128_to_i64_r_minMag(float128_t, bool);
+float16_t f128_to_f16(float128_t);
+float32_t f128_to_f32(float128_t);
+float64_t f128_to_f64(float128_t);
+extFloat80_t f128_to_extF80(float128_t);
+float128_t f128_roundToInt(float128_t, uint_fast8_t, bool);
+float128_t f128_add(float128_t, float128_t);
+float128_t f128_sub(float128_t, float128_t);
+float128_t f128_mul(float128_t, float128_t);
+float128_t f128_mulAdd(float128_t, float128_t, float128_t);
+float128_t f128_div(float128_t, float128_t);
+float128_t f128_rem(float128_t, float128_t);
+float128_t f128_sqrt(float128_t);
+bool f128_eq(float128_t, float128_t);
+bool f128_le(float128_t, float128_t);
+bool f128_lt(float128_t, float128_t);
+bool f128_eq_signaling(float128_t, float128_t);
+bool f128_le_quiet(float128_t, float128_t);
+bool f128_lt_quiet(float128_t, float128_t);
+bool f128_isSignalingNaN(float128_t);
 #endif
-uint_fast32_t f128M_to_ui32( const float128_t *, uint_fast8_t, bool );
-uint_fast64_t f128M_to_ui64( const float128_t *, uint_fast8_t, bool );
-int_fast32_t f128M_to_i32( const float128_t *, uint_fast8_t, bool );
-int_fast64_t f128M_to_i64( const float128_t *, uint_fast8_t, bool );
-uint_fast32_t f128M_to_ui32_r_minMag( const float128_t *, bool );
-uint_fast64_t f128M_to_ui64_r_minMag( const float128_t *, bool );
-int_fast32_t f128M_to_i32_r_minMag( const float128_t *, bool );
-int_fast64_t f128M_to_i64_r_minMag( const float128_t *, bool );
-float16_t f128M_to_f16( const float128_t * );
-float32_t f128M_to_f32( const float128_t * );
-float64_t f128M_to_f64( const float128_t * );
-void f128M_to_extF80M( const float128_t *, extFloat80_t * );
-void f128M_roundToInt( const float128_t *, uint_fast8_t, bool, float128_t * );
-void f128M_add( const float128_t *, const float128_t *, float128_t * );
-void f128M_sub( const float128_t *, const float128_t *, float128_t * );
-void f128M_mul( const float128_t *, const float128_t *, float128_t * );
-void
- f128M_mulAdd(
-     const float128_t *, const float128_t *, const float128_t *, float128_t *
- );
-void f128M_div( const float128_t *, const float128_t *, float128_t * );
-void f128M_rem( const float128_t *, const float128_t *, float128_t * );
-void f128M_sqrt( const float128_t *, float128_t * );
-bool f128M_eq( const float128_t *, const float128_t * );
-bool f128M_le( const float128_t *, const float128_t * );
-bool f128M_lt( const float128_t *, const float128_t * );
-bool f128M_eq_signaling( const float128_t *, const float128_t * );
-bool f128M_le_quiet( const float128_t *, const float128_t * );
-bool f128M_lt_quiet( const float128_t *, const float128_t * );
-bool f128M_isSignalingNaN( const float128_t * );
+uint_fast32_t f128M_to_ui32(const float128_t*, uint_fast8_t, bool);
+uint_fast64_t f128M_to_ui64(const float128_t*, uint_fast8_t, bool);
+int_fast32_t f128M_to_i32(const float128_t*, uint_fast8_t, bool);
+int_fast64_t f128M_to_i64(const float128_t*, uint_fast8_t, bool);
+uint_fast32_t f128M_to_ui32_r_minMag(const float128_t*, bool);
+uint_fast64_t f128M_to_ui64_r_minMag(const float128_t*, bool);
+int_fast32_t f128M_to_i32_r_minMag(const float128_t*, bool);
+int_fast64_t f128M_to_i64_r_minMag(const float128_t*, bool);
+float16_t f128M_to_f16(const float128_t*);
+float32_t f128M_to_f32(const float128_t*);
+float64_t f128M_to_f64(const float128_t*);
+void f128M_to_extF80M(const float128_t*, extFloat80_t*);
+void f128M_roundToInt(const float128_t*, uint_fast8_t, bool, float128_t*);
+void f128M_add(const float128_t*, const float128_t*, float128_t*);
+void f128M_sub(const float128_t*, const float128_t*, float128_t*);
+void f128M_mul(const float128_t*, const float128_t*, float128_t*);
+void f128M_mulAdd(const float128_t*, const float128_t*, const float128_t*, float128_t*);
+void f128M_div(const float128_t*, const float128_t*, float128_t*);
+void f128M_rem(const float128_t*, const float128_t*, float128_t*);
+void f128M_sqrt(const float128_t*, float128_t*);
+bool f128M_eq(const float128_t*, const float128_t*);
+bool f128M_le(const float128_t*, const float128_t*);
+bool f128M_lt(const float128_t*, const float128_t*);
+bool f128M_eq_signaling(const float128_t*, const float128_t*);
+bool f128M_le_quiet(const float128_t*, const float128_t*);
+bool f128M_lt_quiet(const float128_t*, const float128_t*);
+bool f128M_isSignalingNaN(const float128_t*);

 #endif
-
@@ -47,11 +47,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | the types below may, if desired, be defined as aliases for the native types
 | (typically 'float' and 'double', and possibly 'long double').
 *----------------------------------------------------------------------------*/
-typedef struct { uint16_t v; } float16_t;
-typedef struct { uint16_t v; } bfloat16_t;
-typedef struct { uint32_t v; } float32_t;
-typedef struct { uint64_t v; } float64_t;
-typedef struct { uint64_t v[2]; } float128_t;
+typedef struct {
+    uint16_t v;
+} float16_t;
+typedef struct {
+    uint16_t v;
+} bfloat16_t;
+typedef struct {
+    uint32_t v;
+} float32_t;
+typedef struct {
+    uint64_t v;
+} float64_t;
+typedef struct {
+    uint64_t v[2];
+} float128_t;

 /*----------------------------------------------------------------------------
 | The format of an 80-bit extended floating-point number in memory.  This
@@ -59,9 +69,15 @@ typedef struct { uint64_t v[2]; } float128_t;
 | named 'signif'.
 *----------------------------------------------------------------------------*/
 #ifdef LITTLEENDIAN
-struct extFloat80M { uint64_t signif; uint16_t signExp; };
+struct extFloat80M {
+    uint64_t signif;
+    uint16_t signExp;
+};
 #else
-struct extFloat80M { uint16_t signExp; uint64_t signif; };
+struct extFloat80M {
+    uint16_t signExp;
+    uint64_t signif;
+};
 #endif

 /*----------------------------------------------------------------------------
@@ -79,4 +95,3 @@ struct extFloat80M { uint16_t signExp; uint64_t signif; };
 typedef struct extFloat80M extFloat80_t;

 #endif
-
@@ -0,0 +1,35 @@
+#ifdef _MSC_VER
+#define _SCL_SECURE_NO_WARNINGS
+#define ELFIO_NO_INTTYPES
+#endif
+
+#include <elfio/elfio_dump.hpp>
+#include <iostream>
+
+using namespace ELFIO;
+
+int main(int argc, char** argv) {
+    if(argc != 2) {
+        printf("Usage: elfdump <file_name>\n");
+        return 1;
+    }
+
+    elfio reader;
+
+    if(!reader.load(argv[1])) {
+        printf("File %s is not found or it is not an ELF file\n", argv[1]);
+        return 1;
+    }
+
+    dump::header(std::cout, reader);
+    dump::section_headers(std::cout, reader);
+    dump::segment_headers(std::cout, reader);
+    dump::symbol_tables(std::cout, reader);
+    dump::notes(std::cout, reader);
+    dump::modinfo(std::cout, reader);
+    dump::dynamic_tags(std::cout, reader);
+    dump::section_datas(std::cout, reader);
+    dump::segment_datas(std::cout, reader);
+
+    return 0;
+}
@@ -51,8 +51,8 @@ public:
    virtual ~hwl() = default;

 protected:
-    iss::status read_custom_csr_reg(unsigned addr, reg_t& val) override;
-    iss::status write_custom_csr_reg(unsigned addr, reg_t val) override;
+    iss::status read_custom_csr(unsigned addr, reg_t& val) override;
+    iss::status write_custom_csr(unsigned addr, reg_t val) override;
 };

 template <typename BASE>
@@ -68,7 +68,7 @@ inline hwl<BASE>::hwl(feature_config cfg)
    }
 }

-template <typename BASE> inline iss::status iss::arch::hwl<BASE>::read_custom_csr_reg(unsigned addr, reg_t& val) {
+template <typename BASE> inline iss::status iss::arch::hwl<BASE>::read_custom_csr(unsigned addr, reg_t& val) {
    switch(addr) {
    case 0x800:
        val = this->reg.lpstart0;
@@ -92,7 +92,7 @@ template <typename BASE> inline iss::status iss::arch::hwl<BASE>::read_custom_cs
    return iss::Ok;
 }

-template <typename BASE> inline iss::status iss::arch::hwl<BASE>::write_custom_csr_reg(unsigned addr, reg_t val) {
+template <typename BASE> inline iss::status iss::arch::hwl<BASE>::write_custom_csr(unsigned addr, reg_t val) {
    switch(addr) {
    case 0x800:
        this->reg.lpstart0 = val;
@@ -35,11 +35,15 @@
 #ifndef _RISCV_HART_COMMON
 #define _RISCV_HART_COMMON

+#include "iss/vm_types.h"
+#include <array>
 #include <cstdint>
 #include <elfio/elfio.hpp>
 #include <fmt/format.h>
 #include <iss/arch_if.h>
 #include <iss/log_categories.h>
+#include <limits>
+#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <util/logging.h>
@@ -55,8 +59,6 @@
 namespace iss {
 namespace arch {

-enum { tohost_dflt = 0xF0001000, fromhost_dflt = 0xF0001040 };
-
 enum features_e { FEAT_NONE, FEAT_PMP = 1, FEAT_EXT_N = 2, FEAT_CLIC = 4, FEAT_DEBUG = 8, FEAT_TCM = 16 };

 enum riscv_csr {
@@ -312,58 +314,101 @@ inline void write_reg_uint32(uint64_t offs, uint32_t& reg, const uint8_t* const
 }
 struct riscv_hart_common {
    riscv_hart_common(){};
-    ~riscv_hart_common(){};
+    ~riscv_hart_common() {
+        if(io_buf.str().length()) {
+            CPPLOG(INFO) << "tohost send '" << io_buf.str() << "'";
+        }
+    };
    std::unordered_map<std::string, uint64_t> symbol_table;
+    uint64_t entry_address{0};
+    uint64_t tohost = std::numeric_limits<uint64_t>::max();
+    uint64_t fromhost = std::numeric_limits<uint64_t>::max();
+    std::stringstream io_buf;

-    std::unordered_map<std::string, uint64_t> get_sym_table(std::string name) {
-        if(!symbol_table.empty())
-            return symbol_table;
-        FILE* fp = fopen(name.c_str(), "r");
-        if(fp) {
-            std::array<char, 5> buf;
-            auto n = fread(buf.data(), 1, 4, fp);
-            fclose(fp);
-            if(n != 4)
-                throw std::runtime_error("input file has insufficient size");
-            buf[4] = 0;
-            if(strcmp(buf.data() + 1, "ELF") == 0) {
-                // Create elfio reader
-                ELFIO::elfio reader;
-                // Load ELF data
-                if(!reader.load(name))
-                    throw std::runtime_error("could not process elf file");
-                // check elf properties
-                if(reader.get_type() != ET_EXEC)
-                    throw std::runtime_error("wrong elf type in file");
-                if(reader.get_machine() != EM_RISCV)
-                    throw std::runtime_error("wrong elf machine in file");
-                const auto sym_sec = reader.sections[".symtab"];
-                if(SHT_SYMTAB == sym_sec->get_type() || SHT_DYNSYM == sym_sec->get_type()) {
-                    ELFIO::symbol_section_accessor symbols(reader, sym_sec);
-                    auto sym_no = symbols.get_symbols_num();
-                    std::string name;
-                    ELFIO::Elf64_Addr value = 0;
-                    ELFIO::Elf_Xword size = 0;
-                    unsigned char bind = 0;
-                    unsigned char type = 0;
-                    ELFIO::Elf_Half section = 0;
-                    unsigned char other = 0;
-                    for(auto i = 0U; i < sym_no; ++i) {
-                        symbols.get_symbol(i, name, value, size, bind, type, section, other);
-                        if(name != "") {
-                            this->symbol_table[name] = value;
+    bool read_elf_file(std::string name, uint8_t expected_elf_class,
+                       std::function<iss::status(uint64_t, uint64_t, const uint8_t* const)> cb) {
+        // Create elfio reader
+        ELFIO::elfio reader;
+        // Load ELF data
+        if(reader.load(name)) {
+            // check elf properties
+            if(reader.get_class() != expected_elf_class)
+                return false;
+            if(reader.get_type() != ELFIO::ET_EXEC)
+                return false;
+            if(reader.get_machine() != ELFIO::EM_RISCV)
+                return false;
+            entry_address = reader.get_entry();
+            for(const auto& pseg : reader.segments) {
+                const auto fsize = pseg->get_file_size(); // 0x42c/0x0
+                const auto seg_data = pseg->get_data();
+                const auto type = pseg->get_type();
+                if(type == 1 && fsize > 0) {
+                    auto res = cb(pseg->get_physical_address(), fsize, reinterpret_cast<const uint8_t* const>(seg_data));
+                    if(res != iss::Ok)
+                        CPPLOG(ERR) << "problem writing " << fsize << "bytes to 0x" << std::hex << pseg->get_physical_address();
+                }
+            }
+            const auto sym_sec = reader.sections[".symtab"];
+            if(ELFIO::SHT_SYMTAB == sym_sec->get_type() || ELFIO::SHT_DYNSYM == sym_sec->get_type()) {
+                ELFIO::symbol_section_accessor symbols(reader, sym_sec);
+                auto sym_no = symbols.get_symbols_num();
+                std::string name;
+                ELFIO::Elf64_Addr value = 0;
+                ELFIO::Elf_Xword size = 0;
+                unsigned char bind = 0;
+                unsigned char type = 0;
+                ELFIO::Elf_Half section = 0;
+                unsigned char other = 0;
+                for(auto i = 0U; i < sym_no; ++i) {
+                    symbols.get_symbol(i, name, value, size, bind, type, section, other);
+                    if(name != "") {
+                        this->symbol_table[name] = value;
 #ifndef NDEBUG
-                            CPPLOG(DEBUG) << "Found Symbol " << name;
+                        CPPLOG(DEBUG) << "Found Symbol " << name;
 #endif
-                        }
                    }
                }
-                return symbol_table;
+                try {
+                    tohost = symbol_table.at("tohost");
+                } catch(std::out_of_range& e) {
+                }
+                try {
+                    fromhost = symbol_table.at("fromhost");
+                } catch(std::out_of_range& e) {
+                }
            }
-            throw std::runtime_error(fmt::format("memory load file {} is not a valid elf file", name));
-        } else
-            throw std::runtime_error(fmt::format("memory load file not found, check if {} is a valid file", name));
+            return true;
+        }
+        return false;
    };
+    iss::status execute_sys_write(arch_if* aif, const std::array<uint64_t, 8>& loaded_payload, unsigned mem_type) {
+        uint64_t fd = loaded_payload[1];
+        uint64_t buf_ptr = loaded_payload[2];
+        uint64_t len = loaded_payload[3];
+        std::vector<char> buf(len);
+        if(aif->read(address_type::PHYSICAL, access_type::DEBUG_READ, mem_type, buf_ptr, len, reinterpret_cast<uint8_t*>(buf.data()))) {
+            CPPLOG(ERR) << "SYS_WRITE buffer read went wrong";
+            return iss::Err;
+        }
+        // we disregard the fd and just log to stdout
+        for(size_t i = 0; i < len; i++) {
+            if(buf[i] == '\n' || buf[i] == '\0') {
+                CPPLOG(INFO) << "tohost send '" << io_buf.str() << "'";
+                io_buf.str("");
+            } else
+                io_buf << buf[i];
+        }
+
+        // Not sure what the correct return value should be
+        uint8_t ret_val = 1;
+        if(fromhost != std::numeric_limits<uint64_t>::max())
+            if(aif->write(address_type::PHYSICAL, access_type::DEBUG_WRITE, mem_type, fromhost, 1, &ret_val)) {
+                CPPLOG(ERR) << "Fromhost write went wrong";
+                return iss::Err;
+            }
+        return iss::Ok;
+    }
 };

 } // namespace arch
@@ -41,6 +41,11 @@
 #include "iss/vm_if.h"
 #include "iss/vm_types.h"
 #include "riscv_hart_common.h"
+#include "util/logging.h"
+#include <algorithm>
+#include <cstdint>
+#include <elfio/elf_types.hpp>
+#include <limits>
 #include <stdexcept>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
@@ -278,7 +283,7 @@ public:

    void disass_output(uint64_t pc, const std::string instr) override {
        NSCLOG(INFO, LOGCAT) << fmt::format("0x{:016x}    {:40} [s:0x{:x};c:{}]", pc, instr, (reg_t)state.mstatus,
-                                            this->reg.icount + cycle_offset);
+                                            this->reg.cycle + cycle_offset);
    };

    iss::instrumentation_if* get_instrumentation_if() override { return &instr_if; }
@@ -311,7 +316,7 @@ protected:

        uint64_t get_pendig_traps() override { return arch.reg.trap_state; }

-        uint64_t get_total_cycles() override { return arch.reg.icount + arch.cycle_offset; }
+        uint64_t get_total_cycles() override { return arch.reg.cycle + arch.cycle_offset; }

        void update_last_instr_cycles(unsigned cycles) override { arch.cycle_offset += cycles - 1; }

@@ -321,7 +326,7 @@ protected:

        unsigned get_reg_size(unsigned num) override { return traits<BASE>::reg_bit_widths[num]; }

-        std::unordered_map<std::string, uint64_t> get_symbol_table(std::string name) override { return arch.get_sym_table(name); }
+        std::unordered_map<std::string, uint64_t> const& get_symbol_table(std::string name) override { return arch.symbol_table; }

        riscv_hart_m_p<BASE, FEAT, LOGCAT>& arch;
    };
@@ -343,9 +348,6 @@ protected:
    int64_t instret_offset{0};
    uint64_t minstret_csr{0};
    reg_t fault_data;
-    uint64_t tohost = tohost_dflt;
-    uint64_t fromhost = fromhost_dflt;
-    bool tohost_lower_written = false;
    riscv_instrumentation_if instr_if;

    semihosting_cb_t<reg_t> semihosting_cb;
@@ -355,7 +357,6 @@ protected:
    using csr_page_type = typename csr_type::page_type;
    mem_type mem;
    csr_type csr;
-    std::stringstream uart_buf;
    std::unordered_map<reg_t, uint64_t> ptw;
    std::unordered_map<uint64_t, uint8_t> atomic_reservation;
    std::unordered_map<unsigned, rd_csr_f> csr_rd_cb;
@@ -377,8 +378,8 @@ protected:

    std::vector<uint8_t> tcm;

-    iss::status read_csr_reg(unsigned addr, reg_t& val);
-    iss::status write_csr_reg(unsigned addr, reg_t val);
+    iss::status read_plain(unsigned addr, reg_t& val);
+    iss::status write_plain(unsigned addr, reg_t val);
    iss::status read_null(unsigned addr, reg_t& val);
    iss::status write_null(unsigned addr, reg_t val) { return iss::status::Ok; }
    iss::status read_cycle(unsigned addr, reg_t& val);
@@ -399,17 +400,19 @@ protected:
    iss::status read_intstatus(unsigned addr, reg_t& val);
    iss::status write_intthresh(unsigned addr, reg_t val);
    iss::status write_xtvt(unsigned addr, reg_t val);
-    iss::status write_dcsr_dcsr(unsigned addr, reg_t val);
-    iss::status read_dcsr_reg(unsigned addr, reg_t& val);
-    iss::status write_dcsr_reg(unsigned addr, reg_t val);
-    iss::status read_dpc_reg(unsigned addr, reg_t& val);
-    iss::status write_dpc_reg(unsigned addr, reg_t val);
+    iss::status write_dcsr(unsigned addr, reg_t val);
+    iss::status read_debug(unsigned addr, reg_t& val);
+    iss::status write_dscratch(unsigned addr, reg_t val);
+    iss::status read_dpc(unsigned addr, reg_t& val);
+    iss::status write_dpc(unsigned addr, reg_t val);
+    iss::status read_fcsr(unsigned addr, reg_t& val);
+    iss::status write_fcsr(unsigned addr, reg_t val);

-    virtual iss::status read_custom_csr_reg(unsigned addr, reg_t& val) { return iss::status::Err; };
-    virtual iss::status write_custom_csr_reg(unsigned addr, reg_t val) { return iss::status::Err; };
+    virtual iss::status read_custom_csr(unsigned addr, reg_t& val) { return iss::status::Err; };
+    virtual iss::status write_custom_csr(unsigned addr, reg_t val) { return iss::status::Err; };

-    void register_custom_csr_rd(unsigned addr) { csr_rd_cb[addr] = &this_class::read_custom_csr_reg; }
-    void register_custom_csr_wr(unsigned addr) { csr_wr_cb[addr] = &this_class::write_custom_csr_reg; }
+    void register_custom_csr_rd(unsigned addr) { csr_rd_cb[addr] = &this_class::read_custom_csr; }
+    void register_custom_csr_wr(unsigned addr) { csr_wr_cb[addr] = &this_class::write_custom_csr; }

    reg_t mhartid_reg{0x0};

@@ -445,19 +448,22 @@ riscv_hart_m_p<BASE, FEAT, LOGCAT>::riscv_hart_m_p(feature_config cfg)
    csr[marchid] = traits<BASE>::MARCHID_VAL;
    csr[mimpid] = 1;

-    uart_buf.str("");
+    if(traits<BASE>::FLEN > 0) {
+        csr_rd_cb[fcsr] = &this_class::read_fcsr;
+        csr_wr_cb[fcsr] = &this_class::write_fcsr;
+    }
    for(unsigned addr = mhpmcounter3; addr <= mhpmcounter31; ++addr) {
        csr_rd_cb[addr] = &this_class::read_null;
-        csr_wr_cb[addr] = &this_class::write_csr_reg;
+        csr_wr_cb[addr] = &this_class::write_plain;
    }
    if(traits<BASE>::XLEN == 32)
        for(unsigned addr = mhpmcounter3h; addr <= mhpmcounter31h; ++addr) {
            csr_rd_cb[addr] = &this_class::read_null;
-            csr_wr_cb[addr] = &this_class::write_csr_reg;
+            csr_wr_cb[addr] = &this_class::write_plain;
        }
    for(unsigned addr = mhpmevent3; addr <= mhpmevent31; ++addr) {
        csr_rd_cb[addr] = &this_class::read_null;
-        csr_wr_cb[addr] = &this_class::write_csr_reg;
+        csr_wr_cb[addr] = &this_class::write_plain;
    }
    for(unsigned addr = hpmcounter3; addr <= hpmcounter31; ++addr) {
        csr_rd_cb[addr] = &this_class::read_null;
@@ -465,18 +471,17 @@ riscv_hart_m_p<BASE, FEAT, LOGCAT>::riscv_hart_m_p(feature_config cfg)
    if(traits<BASE>::XLEN == 32)
        for(unsigned addr = hpmcounter3h; addr <= hpmcounter31h; ++addr) {
            csr_rd_cb[addr] = &this_class::read_null;
-            // csr_wr_cb[addr] = &this_class::write_csr_reg;
        }
    // common regs
    const std::array<unsigned, 4> roaddrs{{misa, mvendorid, marchid, mimpid}};
    for(auto addr : roaddrs) {
-        csr_rd_cb[addr] = &this_class::read_csr_reg;
+        csr_rd_cb[addr] = &this_class::read_plain;
        csr_wr_cb[addr] = &this_class::write_null;
    }
    const std::array<unsigned, 4> rwaddrs{{mepc, mtvec, mscratch, mtval}};
    for(auto addr : rwaddrs) {
-        csr_rd_cb[addr] = &this_class::read_csr_reg;
-        csr_wr_cb[addr] = &this_class::write_csr_reg;
+        csr_rd_cb[addr] = &this_class::read_plain;
+        csr_wr_cb[addr] = &this_class::write_plain;
    }
    // special handling & overrides
    csr_rd_cb[time] = &this_class::read_time;
@@ -517,7 +522,7 @@ riscv_hart_m_p<BASE, FEAT, LOGCAT>::riscv_hart_m_p(feature_config cfg)
    csr_wr_cb[marchid] = &this_class::write_null;
    csr_wr_cb[mimpid] = &this_class::write_null;
    if(FEAT & FEAT_CLIC) {
-        csr_rd_cb[mtvt] = &this_class::read_csr_reg;
+        csr_rd_cb[mtvt] = &this_class::read_plain;
        csr_wr_cb[mtvt] = &this_class::write_xtvt;
        //        csr_rd_cb[mxnti] = &this_class::read_csr_reg;
        //        csr_wr_cb[mxnti] = &this_class::write_csr_reg;
@@ -527,7 +532,7 @@ riscv_hart_m_p<BASE, FEAT, LOGCAT>::riscv_hart_m_p(feature_config cfg)
        //        csr_wr_cb[mscratchcsw] = &this_class::write_csr_reg;
        //        csr_rd_cb[mscratchcswl] = &this_class::read_csr_reg;
        //        csr_wr_cb[mscratchcswl] = &this_class::write_csr_reg;
-        csr_rd_cb[mintthresh] = &this_class::read_csr_reg;
+        csr_rd_cb[mintthresh] = &this_class::read_plain;
        csr_wr_cb[mintthresh] = &this_class::write_intthresh;
        clic_int_reg.resize(cfg.clic_num_irq, clic_int_reg_t{.raw = 0});
        clic_cfg_reg = 0x20;
@@ -553,14 +558,14 @@ riscv_hart_m_p<BASE, FEAT, LOGCAT>::riscv_hart_m_p(feature_config cfg)
        insert_mem_range(cfg.tcm_base, cfg.tcm_size, read_clic_cb, write_clic_cb);
    }
    if(FEAT & FEAT_DEBUG) {
-        csr_wr_cb[dscratch0] = &this_class::write_dcsr_reg;
-        csr_rd_cb[dscratch0] = &this_class::read_dcsr_reg;
-        csr_wr_cb[dscratch1] = &this_class::write_dcsr_reg;
-        csr_rd_cb[dscratch1] = &this_class::read_dcsr_reg;
-        csr_wr_cb[dpc] = &this_class::write_dpc_reg;
-        csr_rd_cb[dpc] = &this_class::read_dpc_reg;
-        csr_wr_cb[dcsr] = &this_class::write_dcsr_dcsr;
-        csr_rd_cb[dcsr] = &this_class::read_dcsr_reg;
+        csr_wr_cb[dscratch0] = &this_class::write_dscratch;
+        csr_rd_cb[dscratch0] = &this_class::read_debug;
+        csr_wr_cb[dscratch1] = &this_class::write_dscratch;
+        csr_rd_cb[dscratch1] = &this_class::read_debug;
+        csr_wr_cb[dpc] = &this_class::write_dpc;
+        csr_rd_cb[dpc] = &this_class::read_dpc;
+        csr_wr_cb[dcsr] = &this_class::write_dcsr;
+        csr_rd_cb[dcsr] = &this_class::read_debug;
    }
    hart_mem_rd_delegate = [this](phys_addr_t a, unsigned l, uint8_t* const d) -> iss::status { return this->read_mem(a, l, d); };
    hart_mem_wr_delegate = [this](phys_addr_t a, unsigned l, uint8_t const* const d) -> iss::status { return this->write_mem(a, l, d); };
@@ -568,57 +573,14 @@ riscv_hart_m_p<BASE, FEAT, LOGCAT>::riscv_hart_m_p(feature_config cfg)

 template <typename BASE, features_e FEAT, typename LOGCAT>
 std::pair<uint64_t, bool> riscv_hart_m_p<BASE, FEAT, LOGCAT>::load_file(std::string name, int type) {
-    get_sym_table(name);
-    try {
-        tohost = symbol_table.at("tohost");
-        fromhost = symbol_table.at("fromhost");
-    } catch(std::out_of_range& e) {
+    if(read_elf_file(name, sizeof(reg_t) == 4 ? ELFIO::ELFCLASS32 : ELFIO::ELFCLASS64,
+                     [this](uint64_t addr, uint64_t size, const uint8_t* const data) -> iss::status {
+                         return this->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_WRITE, traits<BASE>::MEM, addr, size,
+                                            data);
+                     })) {
+        return std::make_pair(entry_address, true);
    }
-    FILE* fp = fopen(name.c_str(), "r");
-    if(fp) {
-        std::array<char, 5> buf;
-        auto n = fread(buf.data(), 1, 4, fp);
-        fclose(fp);
-        if(n != 4)
-            throw std::runtime_error("input file has insufficient size");
-        buf[4] = 0;
-        if(strcmp(buf.data() + 1, "ELF") == 0) {
-            // Create elfio reader
-            ELFIO::elfio reader;
-            // Load ELF data
-            if(!reader.load(name))
-                throw std::runtime_error("could not process elf file");
-            // check elf properties
-            if(reader.get_class() != ELFCLASS32)
-                if(sizeof(reg_t) == 4)
-                    throw std::runtime_error("wrong elf class in file");
-            if(reader.get_type() != ET_EXEC)
-                throw std::runtime_error("wrong elf type in file");
-            if(reader.get_machine() != EM_RISCV)
-                throw std::runtime_error("wrong elf machine in file");
-            auto entry = reader.get_entry();
-            for(const auto pseg : reader.segments) {
-                const auto fsize = pseg->get_file_size(); // 0x42c/0x0
-                const auto seg_data = pseg->get_data();
-                const auto type = pseg->get_type();
-                if(type == 1 && fsize > 0) {
-                    auto res = this->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_WRITE, traits<BASE>::MEM,
-                                           pseg->get_physical_address(), fsize, reinterpret_cast<const uint8_t* const>(seg_data));
-                    if(res != iss::Ok)
-                        CPPLOG(ERR) << "problem writing " << fsize << "bytes to 0x" << std::hex << pseg->get_physical_address();
-                }
-            }
-            for(const auto sec : reader.sections) {
-                if(sec->get_name() == ".tohost") {
-                    tohost = sec->get_address();
-                    fromhost = tohost + 0x40;
-                }
-            }
-            return std::make_pair(entry, true);
-        }
-        throw std::runtime_error(fmt::format("memory load file {} is not a valid elf file", name));
-    }
-    throw std::runtime_error(fmt::format("memory load file not found, check if {} is a valid file", name));
+    return std::make_pair(entry_address, false);
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
@@ -649,7 +611,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read(const address_type type, co
    try {
        switch(space) {
        case traits<BASE>::MEM: {
-            auto alignment = is_fetch(access) ? (has_compressed() ? 2 : 4) : length;
+            auto alignment = is_fetch(access) ? (has_compressed() ? 2 : 4) : std::min<unsigned>(length, sizeof(reg_t));
            if(unlikely(is_fetch(access) && (addr & (alignment - 1)))) {
                fault_data = addr;
                if(is_debug(access))
@@ -665,7 +627,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read(const address_type type, co
                }
                phys_addr_t phys_addr{access, space, addr};
                auto res = iss::Err;
-                if(access != access_type::FETCH && memfn_range.size()) {
+                if(!is_fetch(access) && memfn_range.size()) {
                    auto it =
                        std::find_if(std::begin(memfn_range), std::end(memfn_range), [phys_addr](std::tuple<uint64_t, uint64_t> const& a) {
                            return std::get<0>(a) <= phys_addr.val && (std::get<0>(a) + std::get<1>(a)) > phys_addr.val;
@@ -684,19 +646,16 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read(const address_type type, co
                }
                return res;
            } catch(trap_access& ta) {
-                this->reg.trap_state = (1UL << 31) | ta.id;
-                fault_data = ta.addr;
+                if((access & access_type::DEBUG) == 0) {
+                    this->reg.trap_state = (1UL << 31) | ta.id;
+                    fault_data = ta.addr;
+                }
                return iss::Err;
            }
        } break;
        case traits<BASE>::CSR: {
            if(length != sizeof(reg_t))
                return iss::Err;
-            // We emulate the FCSR in the architectural state
-            if(addr == 3) {
-                *data = this->get_fcsr();
-                return iss::Ok;
-            }
            return read_csr(addr, *reinterpret_cast<reg_t* const>(data));
        } break;
        case traits<BASE>::FENCE: {
@@ -717,8 +676,10 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read(const address_type type, co
        }
        return iss::Ok;
    } catch(trap_access& ta) {
-        this->reg.trap_state = (1UL << 31) | ta.id;
-        fault_data = ta.addr;
+        if((access & access_type::DEBUG) == 0) {
+            this->reg.trap_state = (1UL << 31) | ta.id;
+            fault_data = ta.addr;
+        }
        return iss::Err;
    }
 }
@@ -746,7 +707,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write(const address_type type, c
                      << std::hex << addr;
        break;
    default:
-        CPPLOG(TRACE) << prefix << "write of " << length << " bytes @addr " << addr;
+        CPPLOG(TRACE) << prefix << "write of " << length << " bytes @addr 0x" << std::hex << addr;
    }
 #endif
    try {
@@ -760,14 +721,15 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write(const address_type type, c
                return iss::Err;
            }
            try {
-                if(length > 1 && (addr & (length - 1)) && (access & access_type::DEBUG) != access_type::DEBUG) {
+                auto alignment = std::min<unsigned>(length, sizeof(reg_t));
+                if(length > 1 && (addr & (alignment - 1)) && !is_debug(access)) {
                    this->reg.trap_state = (1UL << 31) | 6 << 16;
                    fault_data = addr;
                    return iss::Err;
                }
                phys_addr_t phys_addr{access, space, addr};
                auto res = iss::Err;
-                if(access != access_type::FETCH && memfn_range.size()) {
+                if(!is_fetch(access) && memfn_range.size()) {
                    auto it =
                        std::find_if(std::begin(memfn_range), std::end(memfn_range), [phys_addr](std::tuple<uint64_t, uint64_t> const& a) {
                            return std::get<0>(a) <= phys_addr.val && (std::get<0>(a) + std::get<1>(a)) > phys_addr.val;
@@ -780,7 +742,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write(const address_type type, c
                } else {
                    res = write_mem(phys_addr, length, data);
                }
-                if(unlikely(res != iss::Ok && (access & access_type::DEBUG) == 0)) {
+                if(unlikely(res != iss::Ok && !is_debug(access))) {
                    this->reg.trap_state = (1UL << 31) | (7UL << 16); // issue trap 7 (Store/AMO access fault)
                    fault_data = addr;
                }
@@ -790,49 +752,10 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write(const address_type type, c
                fault_data = ta.addr;
                return iss::Err;
            }
-
-            if((addr + length) > mem.size())
-                return iss::Err;
-            switch(addr) {
-            case 0x10013000: // UART0 base, TXFIFO reg
-            case 0x10023000: // UART1 base, TXFIFO reg
-                uart_buf << (char)data[0];
-                if(((char)data[0]) == '\n' || data[0] == 0) {
-                    // CPPLOG(INFO)<<"UART"<<((paddr.val>>16)&0x3)<<" send
-                    // '"<<uart_buf.str()<<"'";
-                    std::cout << uart_buf.str();
-                    uart_buf.str("");
-                }
-                return iss::Ok;
-            case 0x10008000: { // HFROSC base, hfrosccfg reg
-                auto& p = mem(addr / mem.page_size);
-                auto offs = addr & mem.page_addr_mask;
-                std::copy(data, data + length, p.data() + offs);
-                auto& x = *(p.data() + offs + 3);
-                if(x & 0x40)
-                    x |= 0x80; // hfroscrdy = 1 if hfroscen==1
-                return iss::Ok;
-            }
-            case 0x10008008: { // HFROSC base, pllcfg reg
-                auto& p = mem(addr / mem.page_size);
-                auto offs = addr & mem.page_addr_mask;
-                std::copy(data, data + length, p.data() + offs);
-                auto& x = *(p.data() + offs + 3);
-                x |= 0x80; // set pll lock upon writing
-                return iss::Ok;
-            } break;
-            default: {
-            }
-            }
        } break;
        case traits<BASE>::CSR: {
            if(length != sizeof(reg_t))
                return iss::Err;
-            // We emulate the FCSR in the architectural state
-            if(addr == 3) {
-                this->set_fcsr(*data);
-                return iss::Ok;
-            }
            return write_csr(addr, *reinterpret_cast<const reg_t*>(data));
        } break;
        case traits<BASE>::FENCE: {
@@ -855,8 +778,10 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write(const address_type type, c
        }
        return iss::Ok;
    } catch(trap_access& ta) {
-        this->reg.trap_state = (1UL << 31) | ta.id;
-        fault_data = ta.addr;
+        if((access & access_type::DEBUG) == 0) {
+            this->reg.trap_state = (1UL << 31) | ta.id;
+            fault_data = ta.addr;
+        }
        return iss::Err;
    }
 }
@@ -889,12 +814,6 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_csr(unsigned addr, reg_t v
    return (this->*(it->second))(addr, val);
 }

-template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_csr_reg(unsigned addr, reg_t& val) {
-    val = csr[addr];
-    return iss::Ok;
-}
-
 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_null(unsigned addr, reg_t& val) {
    val = 0;
@@ -902,14 +821,20 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_null(unsigned addr, reg_t&
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_csr_reg(unsigned addr, reg_t val) {
+iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_plain(unsigned addr, reg_t& val) {
+    val = csr[addr];
+    return iss::Ok;
+}
+
+template <typename BASE, features_e FEAT, typename LOGCAT>
+iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_plain(unsigned addr, reg_t val) {
    csr[addr] = val;
    return iss::Ok;
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_cycle(unsigned addr, reg_t& val) {
-    auto cycle_val = this->reg.icount + cycle_offset;
+    auto cycle_val = this->reg.cycle + cycle_offset;
    if(addr == mcycle) {
        val = static_cast<reg_t>(cycle_val);
    } else if(addr == mcycleh) {
@@ -929,7 +854,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_cycle(unsigned addr, reg_t
            mcycle_csr = (static_cast<uint64_t>(val) << 32) + (mcycle_csr & 0xffffffff);
        }
    }
-    cycle_offset = mcycle_csr - this->reg.icount; // TODO: relying on wrap-around
+    cycle_offset = mcycle_csr - this->reg.cycle; // TODO: relying on wrap-around
    return iss::Ok;
 }

@@ -960,7 +885,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_instret(unsigned addr, reg

 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_time(unsigned addr, reg_t& val) {
-    uint64_t time_val = this->reg.icount / (100000000 / 32768 - 1); //-> ~3052;
+    uint64_t time_val = this->reg.cycle / (100000000 / 32768 - 1); //-> ~3052;
    if(addr == time) {
        val = static_cast<reg_t>(time_val);
    } else if(addr == timeh) {
@@ -1052,7 +977,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_epc(unsigned addr, reg_t v
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_dcsr_dcsr(unsigned addr, reg_t val) {
+iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_dcsr(unsigned addr, reg_t val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    //                  +-------------- ebreakm
@@ -1064,7 +989,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_dcsr_dcsr(unsigned addr, r
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_dcsr_reg(unsigned addr, reg_t& val) {
+iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_debug(unsigned addr, reg_t& val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    val = csr[addr];
@@ -1072,7 +997,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_dcsr_reg(unsigned addr, reg
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_dcsr_reg(unsigned addr, reg_t val) {
+iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_dscratch(unsigned addr, reg_t val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    csr[addr] = val;
@@ -1080,7 +1005,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_dcsr_reg(unsigned addr, re
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_dpc_reg(unsigned addr, reg_t& val) {
+iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_dpc(unsigned addr, reg_t& val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    val = this->reg.DPC;
@@ -1088,7 +1013,7 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_dpc_reg(unsigned addr, reg_
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_dpc_reg(unsigned addr, reg_t val) {
+iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_dpc(unsigned addr, reg_t val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    this->reg.DPC = val;
@@ -1101,6 +1026,18 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_intstatus(unsigned addr, re
    return iss::Ok;
 }

+template <typename BASE, features_e FEAT, typename LOGCAT>
+iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_fcsr(unsigned addr, reg_t& val) {
+    val = this->get_fcsr();
+    return iss::Ok;
+}
+
+template <typename BASE, features_e FEAT, typename LOGCAT>
+iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_fcsr(unsigned addr, reg_t val) {
+    this->set_fcsr(val);
+    return iss::Ok;
+}
+
 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_intthresh(unsigned addr, reg_t val) {
    csr[addr] = (val & 0xff) | (1 << (cfg.clic_int_ctl_bits)) - 1;
@@ -1127,59 +1064,51 @@ iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::read_mem(phys_addr_t paddr, unsi

 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_m_p<BASE, FEAT, LOGCAT>::write_mem(phys_addr_t paddr, unsigned length, const uint8_t* const data) {
-    switch(paddr.val) {
-    // TODO remove UART, Peripherals should not be part of the ISS
-    case 0xFFFF0000: // UART0 base, TXFIFO reg
-        if(((char)data[0]) == '\n' || data[0] == 0) {
-            CPPLOG(INFO) << "UART" << ((paddr.val >> 12) & 0x3) << " send '" << uart_buf.str() << "'";
-            uart_buf.str("");
-        } else if(((char)data[0]) != '\r')
-            uart_buf << (char)data[0];
-        break;
-    default: {
-        mem_type::page_type& p = mem(paddr.val / mem.page_size);
-        std::copy(data, data + length, p.data() + (paddr.val & mem.page_addr_mask));
-        // tohost handling in case of riscv-test
-        if(paddr.access && iss::access_type::FUNC) {
-            auto tohost_upper =
-                (traits<BASE>::XLEN == 32 && paddr.val == (tohost + 4)) || (traits<BASE>::XLEN == 64 && paddr.val == tohost);
-            auto tohost_lower = (traits<BASE>::XLEN == 32 && paddr.val == tohost) || (traits<BASE>::XLEN == 64 && paddr.val == tohost);
-            if(tohost_lower || tohost_upper) {
-                uint64_t hostvar = *reinterpret_cast<uint64_t*>(p.data() + (tohost & mem.page_addr_mask));
-                // in case of 32 bit system, two writes to tohost are needed, only evaluate on the second (high) write
-                if(tohost_upper && (tohost_lower || tohost_lower_written)) {
-                    switch(hostvar >> 48) {
-                    case 0:
-                        if(hostvar != 0x1) {
-                            CPPLOG(FATAL) << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar
-                                          << "), stopping simulation";
-                        } else {
-                            CPPLOG(INFO) << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar
-                                         << "), stopping simulation";
-                        }
-                        this->reg.trap_state = std::numeric_limits<uint32_t>::max();
-                        this->interrupt_sim = hostvar;
-                        break;
-                    case 0x0101: {
-                        char c = static_cast<char>(hostvar & 0xff);
-                        if(c == '\n' || c == 0) {
-                            CPPLOG(INFO) << "tohost send '" << uart_buf.str() << "'";
-                            uart_buf.str("");
-                        } else
-                            uart_buf << c;
-                    } break;
-                    default:
-                        break;
-                    }
-                    tohost_lower_written = false;
-                } else if(tohost_lower)
-                    tohost_lower_written = true;
-            } else if((traits<BASE>::XLEN == 32 && paddr.val == fromhost + 4) || (traits<BASE>::XLEN == 64 && paddr.val == fromhost)) {
-                uint64_t fhostvar = *reinterpret_cast<uint64_t*>(p.data() + (fromhost & mem.page_addr_mask));
-                *reinterpret_cast<uint64_t*>(p.data() + (tohost & mem.page_addr_mask)) = fhostvar;
+    mem_type::page_type& p = mem(paddr.val / mem.page_size);
+    std::copy(data, data + length, p.data() + (paddr.val & mem.page_addr_mask));
+    // tohost handling in case of riscv-test
+    // according to https://github.com/riscv-software-src/riscv-isa-sim/issues/364#issuecomment-607657754:
+    if(paddr.access && iss::access_type::FUNC) {
+        if(paddr.val == tohost) {
+            reg_t cur_data = *reinterpret_cast<const reg_t*>(data);
+            // Extract Device (bits 63:56)
+            uint8_t device = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 56) & 0xFF;
+            // Extract Command (bits 55:48)
+            uint8_t command = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 48) & 0xFF;
+            // Extract payload (bits 47:0)
+            uint64_t payload_addr = cur_data & 0xFFFFFFFFFFFFULL;
+            if(payload_addr & 1) {
+                CPPLOG(FATAL) << "tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr
+                              << "), stopping simulation";
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
+            } else if(device == 0 && command == 0) {
+                std::array<uint64_t, 8> loaded_payload;
+                if(read(address_type::PHYSICAL, access_type::DEBUG_READ, traits<BASE>::MEM, payload_addr, 8 * sizeof(uint64_t),
+                        reinterpret_cast<uint8_t*>(loaded_payload.data())) == iss::Err)
+                    CPPLOG(ERR) << "Syscall read went wrong";
+                uint64_t syscall_num = loaded_payload.at(0);
+                if(syscall_num == 64) { // SYS_WRITE
+                    return execute_sys_write(this, loaded_payload, traits<BASE>::MEM);
+                } else {
+                    CPPLOG(ERR) << "tohost syscall with number 0x" << std::hex << syscall_num << std::dec << " (" << syscall_num
+                                << ") not implemented";
+                    this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                    this->interrupt_sim = payload_addr;
+                    return iss::Ok;
+                }
+            } else {
+                CPPLOG(ERR) << "tohost functionality not implemented for device " << device << " and command " << command;
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
            }
        }
-    }
+        if((traits<BASE>::XLEN == 32 && paddr.val == fromhost + 4) || (traits<BASE>::XLEN == 64 && paddr.val == fromhost)) {
+            uint64_t fhostvar = *reinterpret_cast<uint64_t*>(p.data() + (fromhost & mem.page_addr_mask));
+            *reinterpret_cast<uint64_t*>(p.data() + (tohost & mem.page_addr_mask)) = fhostvar;
+        }
    }
    return iss::Ok;
 }
@@ -39,7 +39,14 @@
 #include "iss/instrumentation_if.h"
 #include "iss/log_categories.h"
 #include "iss/vm_if.h"
+#include "iss/vm_types.h"
 #include "riscv_hart_common.h"
+#include "util/logging.h"
+#include <algorithm>
+#include <cstdint>
+#include <elfio/elf_types.hpp>
+#include <limits>
+#include <stdexcept>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -326,7 +333,7 @@ public:

    void disass_output(uint64_t pc, const std::string instr) override {
        CLOG(INFO, disass) << fmt::format("0x{:016x}    {:40} [p:{};s:0x{:x};c:{}]", pc, instr, lvl[this->reg.PRIV], (reg_t)state.mstatus,
-                                          this->reg.icount + cycle_offset);
+                                          this->reg.cycle + cycle_offset);
    };

    iss::instrumentation_if* get_instrumentation_if() override { return &instr_if; }
@@ -359,7 +366,7 @@ protected:

        uint64_t get_pendig_traps() override { return arch.reg.trap_state; }

-        uint64_t get_total_cycles() override { return arch.reg.icount + arch.cycle_offset; }
+        uint64_t get_total_cycles() override { return arch.reg.cycle + arch.cycle_offset; }

        void update_last_instr_cycles(unsigned cycles) override { arch.cycle_offset += cycles - 1; }

@@ -369,7 +376,7 @@ protected:

        unsigned get_reg_size(unsigned num) override { return traits<BASE>::reg_bit_widths[num]; }

-        std::unordered_map<std::string, uint64_t> get_symbol_table(std::string name) override { return arch.get_sym_table(name); }
+        std::unordered_map<std::string, uint64_t> const& get_symbol_table(std::string name) override { return arch.symbol_table; }

        riscv_hart_msu_vp<BASE>& arch;
    };
@@ -391,9 +398,6 @@ protected:
    uint64_t minstret_csr{0};
    reg_t fault_data;
    std::array<vm_info, 2> vm;
-    uint64_t tohost = tohost_dflt;
-    uint64_t fromhost = fromhost_dflt;
-    bool tohost_lower_written = false;
    riscv_instrumentation_if instr_if;

    std::function<void(arch_if*, reg_t, reg_t)> semihosting_cb;
@@ -404,7 +408,6 @@ protected:
    mem_type mem;
    csr_type csr;
    void update_vm_info();
-    std::stringstream uart_buf;
    std::unordered_map<reg_t, uint64_t> ptw;
    std::unordered_map<uint64_t, uint8_t> atomic_reservation;
    std::unordered_map<unsigned, rd_csr_f> csr_rd_cb;
@@ -459,7 +462,6 @@ riscv_hart_msu_vp<BASE>::riscv_hart_msu_vp()
    csr[marchid] = traits<BASE>::MARCHID_VAL;
    csr[mimpid] = 1;

-    uart_buf.str("");
    for(unsigned addr = mhpmcounter3; addr <= mhpmcounter31; ++addr) {
        csr_rd_cb[addr] = &this_class::read_null;
        csr_wr_cb[addr] = &this_class::write_csr_reg;
@@ -555,71 +557,14 @@ riscv_hart_msu_vp<BASE>::riscv_hart_msu_vp()
 }

 template <typename BASE> std::pair<uint64_t, bool> riscv_hart_msu_vp<BASE>::load_file(std::string name, int type) {
-    FILE* fp = fopen(name.c_str(), "r");
-    if(fp) {
-        std::array<char, 5> buf;
-        auto n = fread(buf.data(), 1, 4, fp);
-        fclose(fp);
-        if(n != 4)
-            throw std::runtime_error("input file has insufficient size");
-        buf[4] = 0;
-        if(strcmp(buf.data() + 1, "ELF") == 0) {
-            // Create elfio reader
-            ELFIO::elfio reader;
-            // Load ELF data
-            if(!reader.load(name))
-                throw std::runtime_error("could not process elf file");
-            // check elf properties
-            if(reader.get_class() != ELFCLASS32)
-                if(sizeof(reg_t) == 4)
-                    throw std::runtime_error("wrong elf class in file");
-            if(reader.get_type() != ET_EXEC)
-                throw std::runtime_error("wrong elf type in file");
-            if(reader.get_machine() != EM_RISCV)
-                throw std::runtime_error("wrong elf machine in file");
-            auto entry = reader.get_entry();
-            for(const auto pseg : reader.segments) {
-                const auto fsize = pseg->get_file_size(); // 0x42c/0x0
-                const auto seg_data = pseg->get_data();
-                const auto type = pseg->get_type();
-                if(type == 1 && fsize > 0) {
-                    auto res = this->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_WRITE, traits<BASE>::MEM,
-                                           pseg->get_physical_address(), fsize, reinterpret_cast<const uint8_t* const>(seg_data));
-                    if(res != iss::Ok)
-                        CPPLOG(ERR) << "problem writing " << fsize << "bytes to 0x" << std::hex << pseg->get_physical_address();
-                }
-            }
-            for(const auto sec : reader.sections) {
-                if(sec->get_name() == ".symtab") {
-                    if(SHT_SYMTAB == sec->get_type() || SHT_DYNSYM == sec->get_type()) {
-                        ELFIO::symbol_section_accessor symbols(reader, sec);
-                        auto sym_no = symbols.get_symbols_num();
-                        std::string name;
-                        ELFIO::Elf64_Addr value = 0;
-                        ELFIO::Elf_Xword size = 0;
-                        unsigned char bind = 0;
-                        unsigned char type = 0;
-                        ELFIO::Elf_Half section = 0;
-                        unsigned char other = 0;
-                        for(auto i = 0U; i < sym_no; ++i) {
-                            symbols.get_symbol(i, name, value, size, bind, type, section, other);
-                            if(name == "tohost") {
-                                tohost = value;
-                            } else if(name == "fromhost") {
-                                fromhost = value;
-                            }
-                        }
-                    }
-                } else if(sec->get_name() == ".tohost") {
-                    tohost = sec->get_address();
-                    fromhost = tohost + 0x40;
-                }
-            }
-            return std::make_pair(entry, true);
-        }
-        throw std::runtime_error(fmt::format("memory load file {} is not a valid elf file", name));
+    if(read_elf_file(name, sizeof(reg_t) == 4 ? ELFIO::ELFCLASS32 : ELFIO::ELFCLASS64,
+                     [this](uint64_t addr, uint64_t size, const uint8_t* const data) -> iss::status {
+                         return this->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_WRITE, traits<BASE>::MEM, addr, size,
+                                            data);
+                     })) {
+        return std::make_pair(entry_address, true);
    }
-    throw std::runtime_error(fmt::format("memory load file not found, check if {} is a valid file", name));
+    return std::make_pair(entry_address, false);
 }

 template <typename BASE>
@@ -637,7 +582,7 @@ iss::status riscv_hart_msu_vp<BASE>::read(const address_type type, const access_
    try {
        switch(space) {
        case traits<BASE>::MEM: {
-            auto alignment = is_fetch(access) ? (traits<BASE>::MISA_VAL & 0x100 ? 2 : 4) : length;
+            auto alignment = is_fetch(access) ? (has_compressed() ? 2 : 4) : std::min<unsigned>(length, sizeof(reg_t));
            if(unlikely(is_fetch(access) && (addr & (alignment - 1)))) {
                fault_data = addr;
                if(access && iss::access_type::DEBUG)
@@ -669,8 +614,10 @@ iss::status riscv_hart_msu_vp<BASE>::read(const address_type type, const access_
                }
                return res;
            } catch(trap_access& ta) {
-                this->reg.trap_state = (1 << 31) | ta.id;
-                fault_data = ta.addr;
+                if((access & access_type::DEBUG) == 0) {
+                    this->reg.trap_state = (1UL << 31) | ta.id;
+                    fault_data = ta.addr;
+                }
                return iss::Err;
            }
        } break;
@@ -708,8 +655,10 @@ iss::status riscv_hart_msu_vp<BASE>::read(const address_type type, const access_
        }
        return iss::Ok;
    } catch(trap_access& ta) {
-        this->reg.trap_state = (1UL << 31) | ta.id;
-        fault_data = ta.addr;
+        if((access & access_type::DEBUG) == 0) {
+            this->reg.trap_state = (1UL << 31) | ta.id;
+            fault_data = ta.addr;
+        }
        return iss::Err;
    }
 }
@@ -752,6 +701,7 @@ iss::status riscv_hart_msu_vp<BASE>::write(const address_type type, const access
            }
            phys_addr_t paddr = BASE::v2p(iss::addr_t{access, type, space, addr});
            try {
+                // TODO: There is no check for alignment
                if(unlikely((addr & ~PGMASK) != ((addr + length - 1) & ~PGMASK))) { // we may cross a page boundary
                    vm_info vm = hart_state_type::decode_vm_info(this->reg.PRIV, state.satp);
                    if(vm.levels != 0) { // VM is active
@@ -774,40 +724,6 @@ iss::status riscv_hart_msu_vp<BASE>::write(const address_type type, const access
                fault_data = ta.addr;
                return iss::Err;
            }
-
-            if((paddr.val + length) > mem.size())
-                return iss::Err;
-            switch(paddr.val) {
-            case 0x10013000: // UART0 base, TXFIFO reg
-            case 0x10023000: // UART1 base, TXFIFO reg
-                uart_buf << (char)data[0];
-                if(((char)data[0]) == '\n' || data[0] == 0) {
-                    // CPPLOG(INFO)<<"UART"<<((paddr.val>>16)&0x3)<<" send
-                    // '"<<uart_buf.str()<<"'";
-                    std::cout << uart_buf.str();
-                    uart_buf.str("");
-                }
-                return iss::Ok;
-            case 0x10008000: { // HFROSC base, hfrosccfg reg
-                auto& p = mem(paddr.val / mem.page_size);
-                auto offs = paddr.val & mem.page_addr_mask;
-                std::copy(data, data + length, p.data() + offs);
-                auto& x = *(p.data() + offs + 3);
-                if(x & 0x40)
-                    x |= 0x80; // hfroscrdy = 1 if hfroscen==1
-                return iss::Ok;
-            }
-            case 0x10008008: { // HFROSC base, pllcfg reg
-                auto& p = mem(paddr.val / mem.page_size);
-                auto offs = paddr.val & mem.page_addr_mask;
-                std::copy(data, data + length, p.data() + offs);
-                auto& x = *(p.data() + offs + 3);
-                x |= 0x80; // set pll lock upon writing
-                return iss::Ok;
-            } break;
-            default: {
-            }
-            }
        } break;
        case traits<BASE>::CSR: {
            if(length != sizeof(reg_t))
@@ -839,8 +755,10 @@ iss::status riscv_hart_msu_vp<BASE>::write(const address_type type, const access
        }
        return iss::Ok;
    } catch(trap_access& ta) {
-        this->reg.trap_state = (1UL << 31) | ta.id;
-        fault_data = ta.addr;
+        if((access & access_type::DEBUG) == 0) {
+            this->reg.trap_state = (1UL << 31) | ta.id;
+            fault_data = ta.addr;
+        }
        return iss::Err;
    }
 }
@@ -887,7 +805,7 @@ template <typename BASE> iss::status riscv_hart_msu_vp<BASE>::write_reg(unsigned
 }

 template <typename BASE> iss::status riscv_hart_msu_vp<BASE>::read_cycle(unsigned addr, reg_t& val) {
-    auto cycle_val = this->reg.icount + cycle_offset;
+    auto cycle_val = this->reg.cycle + cycle_offset;
    if(addr == mcycle) {
        val = static_cast<reg_t>(cycle_val);
    } else if(addr == mcycleh) {
@@ -908,7 +826,7 @@ template <typename BASE> iss::status riscv_hart_msu_vp<BASE>::write_cycle(unsign
            mcycle_csr = (static_cast<uint64_t>(val) << 32) + (mcycle_csr & 0xffffffff);
        }
    }
-    cycle_offset = mcycle_csr - this->reg.icount; // TODO: relying on wrap-around
+    cycle_offset = mcycle_csr - this->reg.cycle; // TODO: relying on wrap-around
    return iss::Ok;
 }

@@ -936,7 +854,7 @@ template <typename BASE> iss::status riscv_hart_msu_vp<BASE>::write_instret(unsi
 }

 template <typename BASE> iss::status riscv_hart_msu_vp<BASE>::read_time(unsigned addr, reg_t& val) {
-    uint64_t time_val = this->reg.icount / (100000000 / 32768 - 1); //-> ~3052;
+    uint64_t time_val = this->reg.cycle / (100000000 / 32768 - 1); //-> ~3052;
    if(addr == time) {
        val = static_cast<reg_t>(time_val);
    } else if(addr == timeh) {
@@ -1075,61 +993,51 @@ template <typename BASE> iss::status riscv_hart_msu_vp<BASE>::read_mem(phys_addr
 }

 template <typename BASE> iss::status riscv_hart_msu_vp<BASE>::write_mem(phys_addr_t paddr, unsigned length, const uint8_t* const data) {
-    switch(paddr.val) {
-    case 0xFFFF0000: // UART0 base, TXFIFO reg
-        if(((char)data[0]) == '\n' || data[0] == 0) {
-            CPPLOG(INFO) << "UART" << ((paddr.val >> 12) & 0x3) << " send '" << uart_buf.str() << "'";
-            uart_buf.str("");
-        } else if(((char)data[0]) != '\r')
-            uart_buf << (char)data[0];
-        break;
-    default: {
-        mem_type::page_type& p = mem(paddr.val / mem.page_size);
-        std::copy(data, data + length, p.data() + (paddr.val & mem.page_addr_mask));
-        // tohost handling in case of riscv-test
-        if(paddr.access && iss::access_type::FUNC) {
-            auto tohost_upper =
-                (traits<BASE>::XLEN == 32 && paddr.val == (tohost + 4)) || (traits<BASE>::XLEN == 64 && paddr.val == tohost);
-            auto tohost_lower = (traits<BASE>::XLEN == 32 && paddr.val == tohost) || (traits<BASE>::XLEN == 64 && paddr.val == tohost);
-            if(tohost_lower || tohost_upper) {
-                uint64_t hostvar = *reinterpret_cast<uint64_t*>(p.data() + (tohost & mem.page_addr_mask));
-                // in case of 32 bit system, two writes to tohost are needed, only evaluate on the second (high) write
-                if(tohost_upper && (tohost_lower || tohost_lower_written)) {
-                    switch(hostvar >> 48) {
-                    case 0:
-                        if(hostvar != 0x1) {
-                            CPPLOG(FATAL) << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar
-                                          << "), stopping simulation";
-                        } else {
-                            CPPLOG(INFO) << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar
-                                         << "), stopping simulation";
-                        }
-                        this->reg.trap_state = std::numeric_limits<uint32_t>::max();
-                        this->interrupt_sim = hostvar;
-#ifndef WITH_TCC
-                        throw(iss::simulation_stopped(hostvar));
-#endif
-                        break;
-                    case 0x0101: {
-                        char c = static_cast<char>(hostvar & 0xff);
-                        if(c == '\n' || c == 0) {
-                            CPPLOG(INFO) << "tohost send '" << uart_buf.str() << "'";
-                            uart_buf.str("");
-                        } else
-                            uart_buf << c;
-                    } break;
-                    default:
-                        break;
-                    }
-                    tohost_lower_written = false;
-                } else if(tohost_lower)
-                    tohost_lower_written = true;
-            } else if((traits<BASE>::XLEN == 32 && paddr.val == fromhost + 4) || (traits<BASE>::XLEN == 64 && paddr.val == fromhost)) {
-                uint64_t fhostvar = *reinterpret_cast<uint64_t*>(p.data() + (fromhost & mem.page_addr_mask));
-                *reinterpret_cast<uint64_t*>(p.data() + (tohost & mem.page_addr_mask)) = fhostvar;
+    mem_type::page_type& p = mem(paddr.val / mem.page_size);
+    std::copy(data, data + length, p.data() + (paddr.val & mem.page_addr_mask));
+    // tohost handling in case of riscv-test
+    // according to https://github.com/riscv-software-src/riscv-isa-sim/issues/364#issuecomment-607657754:
+    if(paddr.access && iss::access_type::FUNC) {
+        if(paddr.val == tohost) {
+            reg_t cur_data = *reinterpret_cast<const reg_t*>(data);
+            // Extract Device (bits 63:56)
+            uint8_t device = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 56) & 0xFF;
+            // Extract Command (bits 55:48)
+            uint8_t command = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 48) & 0xFF;
+            // Extract payload (bits 47:0)
+            uint64_t payload_addr = cur_data & 0xFFFFFFFFFFFFULL;
+            if(payload_addr & 1) {
+                CPPLOG(FATAL) << "tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr
+                              << "), stopping simulation";
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
+            } else if(device == 0 && command == 0) {
+                std::array<uint64_t, 8> loaded_payload;
+                if(read(address_type::PHYSICAL, access_type::DEBUG_READ, traits<BASE>::MEM, payload_addr, 8 * sizeof(uint64_t),
+                        reinterpret_cast<uint8_t*>(loaded_payload.data())) == iss::Err)
+                    CPPLOG(ERR) << "Syscall read went wrong";
+                uint64_t syscall_num = loaded_payload.at(0);
+                if(syscall_num == 64) { // SYS_WRITE
+                    return execute_sys_write(this, loaded_payload, traits<BASE>::MEM);
+                } else {
+                    CPPLOG(ERR) << "tohost syscall with number 0x" << std::hex << syscall_num << std::dec << " (" << syscall_num
+                                << ") not implemented";
+                    this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                    this->interrupt_sim = payload_addr;
+                    return iss::Ok;
+                }
+            } else {
+                CPPLOG(ERR) << "tohost functionality not implemented for device " << device << " and command " << command;
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
            }
        }
-    }
+        if((traits<BASE>::XLEN == 32 && paddr.val == fromhost + 4) || (traits<BASE>::XLEN == 64 && paddr.val == fromhost)) {
+            uint64_t fhostvar = *reinterpret_cast<uint64_t*>(p.data() + (fromhost & mem.page_addr_mask));
+            *reinterpret_cast<uint64_t*>(p.data() + (tohost & mem.page_addr_mask)) = fhostvar;
+        }
    }
    return iss::Ok;
 }
@@ -39,7 +39,14 @@
 #include "iss/instrumentation_if.h"
 #include "iss/log_categories.h"
 #include "iss/vm_if.h"
+#include "iss/vm_types.h"
 #include "riscv_hart_common.h"
+#include "util/logging.h"
+#include <algorithm>
+#include <cstdint>
+#include <elfio/elf_types.hpp>
+#include <limits>
+#include <stdexcept>
 #ifndef FMT_HEADER_ONLY
 #define FMT_HEADER_ONLY
 #endif
@@ -302,8 +309,8 @@ public:
    void set_mhartid(reg_t mhartid) { mhartid_reg = mhartid; };

    void disass_output(uint64_t pc, const std::string instr) override {
-        CLOG(INFO, disass) << fmt::format("0x{:016x}    {:40} [p:{};s:0x{:x};c:{}]", pc, instr, lvl[this->reg.PRIV], (reg_t)state.mstatus,
-                                          this->reg.icount + cycle_offset);
+        NSCLOG(INFO, LOGCAT) << fmt::format("0x{:016x}    {:40} [p:{};s:0x{:x};c:{}]", pc, instr, lvl[this->reg.PRIV], (reg_t)state.mstatus,
+                                            this->reg.cycle + cycle_offset);
    };

    iss::instrumentation_if* get_instrumentation_if() override { return &instr_if; }
@@ -336,7 +343,7 @@ protected:

        uint64_t get_pendig_traps() override { return arch.reg.trap_state; }

-        uint64_t get_total_cycles() override { return arch.reg.icount + arch.cycle_offset; }
+        uint64_t get_total_cycles() override { return arch.reg.cycle + arch.cycle_offset; }

        void update_last_instr_cycles(unsigned cycles) override { arch.cycle_offset += cycles - 1; }

@@ -346,7 +353,7 @@ protected:

        unsigned get_reg_size(unsigned num) override { return traits<BASE>::reg_bit_widths[num]; }

-        std::unordered_map<std::string, uint64_t> get_symbol_table(std::string name) override { return arch.get_sym_table(name); }
+        std::unordered_map<std::string, uint64_t> const& get_symbol_table(std::string name) override { return arch.symbol_table; }

        riscv_hart_mu_p<BASE, FEAT, LOGCAT>& arch;
    };
@@ -368,9 +375,6 @@ protected:
    int64_t instret_offset{0};
    uint64_t minstret_csr{0};
    reg_t fault_data;
-    uint64_t tohost = tohost_dflt;
-    uint64_t fromhost = fromhost_dflt;
-    bool tohost_lower_written = false;
    riscv_instrumentation_if instr_if;

    semihosting_cb_t<reg_t> semihosting_cb;
@@ -380,7 +384,6 @@ protected:
    using csr_page_type = typename csr_type::page_type;
    mem_type mem;
    csr_type csr;
-    std::stringstream uart_buf;
    std::unordered_map<reg_t, uint64_t> ptw;
    std::unordered_map<uint64_t, uint8_t> atomic_reservation;
    std::unordered_map<unsigned, rd_csr_f> csr_rd_cb;
@@ -402,8 +405,8 @@ protected:

    std::vector<uint8_t> tcm;

-    iss::status read_csr_reg(unsigned addr, reg_t& val);
-    iss::status write_csr_reg(unsigned addr, reg_t val);
+    iss::status read_plain(unsigned addr, reg_t& val);
+    iss::status write_plain(unsigned addr, reg_t val);
    iss::status read_null(unsigned addr, reg_t& val);
    iss::status write_null(unsigned addr, reg_t val) { return iss::status::Ok; }
    iss::status read_cycle(unsigned addr, reg_t& val);
@@ -426,15 +429,17 @@ protected:
    iss::status read_intstatus(unsigned addr, reg_t& val);
    iss::status write_intthresh(unsigned addr, reg_t val);
    iss::status write_xtvt(unsigned addr, reg_t val);
-    iss::status write_dcsr_dcsr(unsigned addr, reg_t val);
-    iss::status read_dcsr_reg(unsigned addr, reg_t& val);
-    iss::status write_dcsr_reg(unsigned addr, reg_t val);
-    iss::status read_dpc_reg(unsigned addr, reg_t& val);
-    iss::status write_dpc_reg(unsigned addr, reg_t val);
-    iss::status write_pmpcfg_reg(unsigned addr, reg_t val);
+    iss::status write_dcsr(unsigned addr, reg_t val);
+    iss::status read_debug(unsigned addr, reg_t& val);
+    iss::status write_dscratch(unsigned addr, reg_t val);
+    iss::status read_dpc(unsigned addr, reg_t& val);
+    iss::status write_dpc(unsigned addr, reg_t val);
+    iss::status read_fcsr(unsigned addr, reg_t& val);
+    iss::status write_fcsr(unsigned addr, reg_t val);
+    iss::status write_pmpcfg(unsigned addr, reg_t val);

-    virtual iss::status read_custom_csr_reg(unsigned addr, reg_t& val) { return iss::status::Err; };
-    virtual iss::status write_custom_csr_reg(unsigned addr, reg_t val) { return iss::status::Err; };
+    virtual iss::status read_custom_csr(unsigned addr, reg_t& val) { return iss::status::Err; };
+    virtual iss::status write_custom_csr(unsigned addr, reg_t val) { return iss::status::Err; };

    void register_custom_csr_rd(unsigned addr) { csr_rd_cb[addr] = &this_class::read_custom_csr_reg; }
    void register_custom_csr_wr(unsigned addr) { csr_wr_cb[addr] = &this_class::write_custom_csr_reg; }
@@ -473,19 +478,22 @@ riscv_hart_mu_p<BASE, FEAT, LOGCAT>::riscv_hart_mu_p(feature_config cfg)
    csr[marchid] = traits<BASE>::MARCHID_VAL;
    csr[mimpid] = 1;

-    uart_buf.str("");
+    if(traits<BASE>::FLEN > 0) {
+        csr_rd_cb[fcsr] = &this_class::read_fcsr;
+        csr_wr_cb[fcsr] = &this_class::write_fcsr;
+    }
    for(unsigned addr = mhpmcounter3; addr <= mhpmcounter31; ++addr) {
        csr_rd_cb[addr] = &this_class::read_null;
-        csr_wr_cb[addr] = &this_class::write_csr_reg;
+        csr_wr_cb[addr] = &this_class::write_plain;
    }
    if(traits<BASE>::XLEN == 32)
        for(unsigned addr = mhpmcounter3h; addr <= mhpmcounter31h; ++addr) {
            csr_rd_cb[addr] = &this_class::read_null;
-            csr_wr_cb[addr] = &this_class::write_csr_reg;
+            csr_wr_cb[addr] = &this_class::write_plain;
        }
    for(unsigned addr = mhpmevent3; addr <= mhpmevent31; ++addr) {
        csr_rd_cb[addr] = &this_class::read_null;
-        csr_wr_cb[addr] = &this_class::write_csr_reg;
+        csr_wr_cb[addr] = &this_class::write_plain;
    }
    for(unsigned addr = hpmcounter3; addr <= hpmcounter31; ++addr) {
        csr_rd_cb[addr] = &this_class::read_null;
@@ -493,12 +501,11 @@ riscv_hart_mu_p<BASE, FEAT, LOGCAT>::riscv_hart_mu_p(feature_config cfg)
    if(traits<BASE>::XLEN == 32)
        for(unsigned addr = hpmcounter3h; addr <= hpmcounter31h; ++addr) {
            csr_rd_cb[addr] = &this_class::read_null;
-            // csr_wr_cb[addr] = &this_class::write_csr_reg;
        }
    // common regs
    const std::array<unsigned, 4> roaddrs{{misa, mvendorid, marchid, mimpid}};
    for(auto addr : roaddrs) {
-        csr_rd_cb[addr] = &this_class::read_csr_reg;
+        csr_rd_cb[addr] = &this_class::read_plain;
        csr_wr_cb[addr] = &this_class::write_null;
    }
    const std::array<unsigned, 8> rwaddrs{{
@@ -512,8 +519,8 @@ riscv_hart_mu_p<BASE, FEAT, LOGCAT>::riscv_hart_mu_p(feature_config cfg)
        utval,
    }};
    for(auto addr : rwaddrs) {
-        csr_rd_cb[addr] = &this_class::read_csr_reg;
-        csr_wr_cb[addr] = &this_class::write_csr_reg;
+        csr_rd_cb[addr] = &this_class::read_plain;
+        csr_wr_cb[addr] = &this_class::write_plain;
    }
    // special handling & overrides
    csr_rd_cb[time] = &this_class::read_time;
@@ -558,18 +565,18 @@ riscv_hart_mu_p<BASE, FEAT, LOGCAT>::riscv_hart_mu_p(feature_config cfg)

    if(FEAT & FEAT_PMP) {
        for(size_t i = pmpaddr0; i <= pmpaddr15; ++i) {
-            csr_rd_cb[i] = &this_class::read_csr_reg;
-            csr_wr_cb[i] = &this_class::write_csr_reg;
+            csr_rd_cb[i] = &this_class::read_plain;
+            csr_wr_cb[i] = &this_class::write_plain;
        }
        for(size_t i = pmpcfg0; i < pmpcfg0 + 16 / sizeof(reg_t); ++i) {
-            csr_rd_cb[i] = &this_class::read_csr_reg;
-            csr_wr_cb[i] = &this_class::write_pmpcfg_reg;
+            csr_rd_cb[i] = &this_class::read_plain;
+            csr_wr_cb[i] = &this_class::write_pmpcfg;
        }
    }
    if(FEAT & FEAT_EXT_N) {
-        csr_rd_cb[mideleg] = &this_class::read_csr_reg;
+        csr_rd_cb[mideleg] = &this_class::read_plain;
        csr_wr_cb[mideleg] = &this_class::write_ideleg;
-        csr_rd_cb[medeleg] = &this_class::read_csr_reg;
+        csr_rd_cb[medeleg] = &this_class::read_plain;
        csr_wr_cb[medeleg] = &this_class::write_edeleg;
        csr_rd_cb[uie] = &this_class::read_ie;
        csr_wr_cb[uie] = &this_class::write_ie;
@@ -583,7 +590,7 @@ riscv_hart_mu_p<BASE, FEAT, LOGCAT>::riscv_hart_mu_p(feature_config cfg)
        csr_rd_cb[utvec] = &this_class::read_tvec;
    }
    if(FEAT & FEAT_CLIC) {
-        csr_rd_cb[mtvt] = &this_class::read_csr_reg;
+        csr_rd_cb[mtvt] = &this_class::read_plain;
        csr_wr_cb[mtvt] = &this_class::write_xtvt;
        //        csr_rd_cb[mxnti] = &this_class::read_csr_reg;
        //        csr_wr_cb[mxnti] = &this_class::write_csr_reg;
@@ -593,14 +600,14 @@ riscv_hart_mu_p<BASE, FEAT, LOGCAT>::riscv_hart_mu_p(feature_config cfg)
        //        csr_wr_cb[mscratchcsw] = &this_class::write_csr_reg;
        //        csr_rd_cb[mscratchcswl] = &this_class::read_csr_reg;
        //        csr_wr_cb[mscratchcswl] = &this_class::write_csr_reg;
-        csr_rd_cb[mintthresh] = &this_class::read_csr_reg;
+        csr_rd_cb[mintthresh] = &this_class::read_plain;
        csr_wr_cb[mintthresh] = &this_class::write_intthresh;
        if(FEAT & FEAT_EXT_N) {
-            csr_rd_cb[utvt] = &this_class::read_csr_reg;
+            csr_rd_cb[utvt] = &this_class::read_plain;
            csr_wr_cb[utvt] = &this_class::write_xtvt;
            csr_rd_cb[uintstatus] = &this_class::read_intstatus;
            csr_wr_cb[uintstatus] = &this_class::write_null;
-            csr_rd_cb[uintthresh] = &this_class::read_csr_reg;
+            csr_rd_cb[uintthresh] = &this_class::read_plain;
            csr_wr_cb[uintthresh] = &this_class::write_intthresh;
        }
        clic_int_reg.resize(cfg.clic_num_irq, clic_int_reg_t{.raw = 0});
@@ -629,14 +636,14 @@ riscv_hart_mu_p<BASE, FEAT, LOGCAT>::riscv_hart_mu_p(feature_config cfg)
        insert_mem_range(cfg.tcm_base, cfg.tcm_size, read_clic_cb, write_clic_cb);
    }
    if(FEAT & FEAT_DEBUG) {
-        csr_wr_cb[dscratch0] = &this_class::write_dcsr_reg;
-        csr_rd_cb[dscratch0] = &this_class::read_dcsr_reg;
-        csr_wr_cb[dscratch1] = &this_class::write_dcsr_reg;
-        csr_rd_cb[dscratch1] = &this_class::read_dcsr_reg;
-        csr_wr_cb[dpc] = &this_class::write_dpc_reg;
-        csr_rd_cb[dpc] = &this_class::read_dpc_reg;
-        csr_wr_cb[dcsr] = &this_class::write_dcsr_dcsr;
-        csr_rd_cb[dcsr] = &this_class::read_dcsr_reg;
+        csr_wr_cb[dscratch0] = &this_class::write_dscratch;
+        csr_rd_cb[dscratch0] = &this_class::read_debug;
+        csr_wr_cb[dscratch1] = &this_class::write_dscratch;
+        csr_rd_cb[dscratch1] = &this_class::read_debug;
+        csr_wr_cb[dpc] = &this_class::write_dpc;
+        csr_rd_cb[dpc] = &this_class::read_dpc;
+        csr_wr_cb[dcsr] = &this_class::write_dcsr;
+        csr_rd_cb[dcsr] = &this_class::read_debug;
    }
    hart_mem_rd_delegate = [this](phys_addr_t a, unsigned l, uint8_t* const d) -> iss::status { return this->read_mem(a, l, d); };
    hart_mem_wr_delegate = [this](phys_addr_t a, unsigned l, uint8_t const* const d) -> iss::status { return this->write_mem(a, l, d); };
@@ -644,71 +651,14 @@ riscv_hart_mu_p<BASE, FEAT, LOGCAT>::riscv_hart_mu_p(feature_config cfg)

 template <typename BASE, features_e FEAT, typename LOGCAT>
 std::pair<uint64_t, bool> riscv_hart_mu_p<BASE, FEAT, LOGCAT>::load_file(std::string name, int type) {
-    FILE* fp = fopen(name.c_str(), "r");
-    if(fp) {
-        std::array<char, 5> buf;
-        auto n = fread(buf.data(), 1, 4, fp);
-        fclose(fp);
-        if(n != 4)
-            throw std::runtime_error("input file has insufficient size");
-        buf[4] = 0;
-        if(strcmp(buf.data() + 1, "ELF") == 0) {
-            // Create elfio reader
-            ELFIO::elfio reader;
-            // Load ELF data
-            if(!reader.load(name))
-                throw std::runtime_error("could not process elf file");
-            // check elf properties
-            if(reader.get_class() != ELFCLASS32)
-                if(sizeof(reg_t) == 4)
-                    throw std::runtime_error("wrong elf class in file");
-            if(reader.get_type() != ET_EXEC)
-                throw std::runtime_error("wrong elf type in file");
-            if(reader.get_machine() != EM_RISCV)
-                throw std::runtime_error("wrong elf machine in file");
-            auto entry = reader.get_entry();
-            for(const auto pseg : reader.segments) {
-                const auto fsize = pseg->get_file_size(); // 0x42c/0x0
-                const auto seg_data = pseg->get_data();
-                const auto type = pseg->get_type();
-                if(type == 1 && fsize > 0) {
-                    auto res = this->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_WRITE, traits<BASE>::MEM,
-                                           pseg->get_physical_address(), fsize, reinterpret_cast<const uint8_t* const>(seg_data));
-                    if(res != iss::Ok)
-                        CPPLOG(ERR) << "problem writing " << fsize << "bytes to 0x" << std::hex << pseg->get_physical_address();
-                }
-            }
-            for(const auto sec : reader.sections) {
-                if(sec->get_name() == ".symtab") {
-                    if(SHT_SYMTAB == sec->get_type() || SHT_DYNSYM == sec->get_type()) {
-                        ELFIO::symbol_section_accessor symbols(reader, sec);
-                        auto sym_no = symbols.get_symbols_num();
-                        std::string name;
-                        ELFIO::Elf64_Addr value = 0;
-                        ELFIO::Elf_Xword size = 0;
-                        unsigned char bind = 0;
-                        unsigned char type = 0;
-                        ELFIO::Elf_Half section = 0;
-                        unsigned char other = 0;
-                        for(auto i = 0U; i < sym_no; ++i) {
-                            symbols.get_symbol(i, name, value, size, bind, type, section, other);
-                            if(name == "tohost") {
-                                tohost = value;
-                            } else if(name == "fromhost") {
-                                fromhost = value;
-                            }
-                        }
-                    }
-                } else if(sec->get_name() == ".tohost") {
-                    tohost = sec->get_address();
-                    fromhost = tohost + 0x40;
-                }
-            }
-            return std::make_pair(entry, true);
-        }
-        throw std::runtime_error(fmt::format("memory load file {} is not a valid elf file", name));
+    if(read_elf_file(name, sizeof(reg_t) == 4 ? ELFIO::ELFCLASS32 : ELFIO::ELFCLASS64,
+                     [this](uint64_t addr, uint64_t size, const uint8_t* const data) -> iss::status {
+                         return this->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_WRITE, traits<BASE>::MEM, addr, size,
+                                            data);
+                     })) {
+        return std::make_pair(entry_address, true);
    }
-    throw std::runtime_error(fmt::format("memory load file not found, check if {} is a valid file", name));
+    return std::make_pair(entry_address, false);
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
@@ -725,7 +675,7 @@ inline void riscv_hart_mu_p<BASE, FEAT, LOGCAT>::insert_mem_range(uint64_t base,
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-inline iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_pmpcfg_reg(unsigned addr, reg_t val) {
+inline iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_pmpcfg(unsigned addr, reg_t val) {
    csr[addr] = val & 0x9f9f9f9f;
    return iss::Ok;
 }
@@ -835,7 +785,7 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read(const address_type type, c
                    return iss::Err;
                }
            }
-            auto alignment = is_fetch(access) ? (has_compressed() ? 2 : 4) : length;
+            auto alignment = is_fetch(access) ? (has_compressed() ? 2 : 4) : std::min<unsigned>(length, sizeof(reg_t));
            if(unlikely(is_fetch(access) && (addr & (alignment - 1)))) {
                fault_data = addr;
                if(is_debug(access))
@@ -870,8 +820,10 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read(const address_type type, c
                }
                return res;
            } catch(trap_access& ta) {
-                this->reg.trap_state = (1UL << 31) | ta.id;
-                fault_data = ta.addr;
+                if((access & access_type::DEBUG) == 0) {
+                    this->reg.trap_state = (1UL << 31) | ta.id;
+                    fault_data = ta.addr;
+                }
                return iss::Err;
            }
        } break;
@@ -898,8 +850,10 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read(const address_type type, c
        }
        return iss::Ok;
    } catch(trap_access& ta) {
-        this->reg.trap_state = (1UL << 31) | ta.id;
-        fault_data = ta.addr;
+        if((access & access_type::DEBUG) == 0) {
+            this->reg.trap_state = (1UL << 31) | ta.id;
+            fault_data = ta.addr;
+        }
        return iss::Err;
    }
 }
@@ -950,7 +904,8 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write(const address_type type,
                return iss::Err;
            }
            try {
-                if(length > 1 && (addr & (length - 1)) && (access & access_type::DEBUG) != access_type::DEBUG) {
+                auto alignment = std::min<unsigned>(length, sizeof(reg_t));
+                if(length > 1 && (addr & (alignment - 1)) && !is_debug(access)) {
                    this->reg.trap_state = (1UL << 31) | 6 << 16;
                    fault_data = addr;
                    return iss::Err;
@@ -980,40 +935,6 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write(const address_type type,
                fault_data = ta.addr;
                return iss::Err;
            }
-
-            if((addr + length) > mem.size())
-                return iss::Err;
-            switch(addr) {
-            case 0x10013000: // UART0 base, TXFIFO reg
-            case 0x10023000: // UART1 base, TXFIFO reg
-                uart_buf << (char)data[0];
-                if(((char)data[0]) == '\n' || data[0] == 0) {
-                    // CPPLOG(INFO)<<"UART"<<((addr>>16)&0x3)<<" send
-                    // '"<<uart_buf.str()<<"'";
-                    std::cout << uart_buf.str();
-                    uart_buf.str("");
-                }
-                return iss::Ok;
-            case 0x10008000: { // HFROSC base, hfrosccfg reg
-                auto& p = mem(addr / mem.page_size);
-                auto offs = addr & mem.page_addr_mask;
-                std::copy(data, data + length, p.data() + offs);
-                auto& x = *(p.data() + offs + 3);
-                if(x & 0x40)
-                    x |= 0x80; // hfroscrdy = 1 if hfroscen==1
-                return iss::Ok;
-            }
-            case 0x10008008: { // HFROSC base, pllcfg reg
-                auto& p = mem(addr / mem.page_size);
-                auto offs = addr & mem.page_addr_mask;
-                std::copy(data, data + length, p.data() + offs);
-                auto& x = *(p.data() + offs + 3);
-                x |= 0x80; // set pll lock upon writing
-                return iss::Ok;
-            } break;
-            default: {
-            }
-            }
        } break;
        case traits<BASE>::CSR: {
            if(length != sizeof(reg_t))
@@ -1040,8 +961,10 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write(const address_type type,
        }
        return iss::Ok;
    } catch(trap_access& ta) {
-        this->reg.trap_state = (1UL << 31) | ta.id;
-        fault_data = ta.addr;
+        if((access & access_type::DEBUG) == 0) {
+            this->reg.trap_state = (1UL << 31) | ta.id;
+            fault_data = ta.addr;
+        }
        return iss::Err;
    }
 }
@@ -1074,12 +997,6 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_csr(unsigned addr, reg_t
    return (this->*(it->second))(addr, val);
 }

-template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_csr_reg(unsigned addr, reg_t& val) {
-    val = csr[addr];
-    return iss::Ok;
-}
-
 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_null(unsigned addr, reg_t& val) {
    val = 0;
@@ -1087,14 +1004,20 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_null(unsigned addr, reg_t&
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_csr_reg(unsigned addr, reg_t val) {
+iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_plain(unsigned addr, reg_t& val) {
+    val = csr[addr];
+    return iss::Ok;
+}
+
+template <typename BASE, features_e FEAT, typename LOGCAT>
+iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_plain(unsigned addr, reg_t val) {
    csr[addr] = val;
    return iss::Ok;
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_cycle(unsigned addr, reg_t& val) {
-    auto cycle_val = this->reg.icount + cycle_offset;
+    auto cycle_val = this->reg.cycle + cycle_offset;
    if(addr == mcycle) {
        val = static_cast<reg_t>(cycle_val);
    } else if(addr == mcycleh) {
@@ -1114,7 +1037,7 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_cycle(unsigned addr, reg_
            mcycle_csr = (static_cast<uint64_t>(val) << 32) + (mcycle_csr & 0xffffffff);
        }
    }
-    cycle_offset = mcycle_csr - this->reg.icount; // TODO: relying on wrap-around
+    cycle_offset = mcycle_csr - this->reg.cycle; // TODO: relying on wrap-around
    return iss::Ok;
 }

@@ -1145,7 +1068,7 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_instret(unsigned addr, re

 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_time(unsigned addr, reg_t& val) {
-    uint64_t time_val = this->reg.icount / (100000000 / 32768 - 1); //-> ~3052;
+    uint64_t time_val = this->reg.cycle / (100000000 / 32768 - 1); //-> ~3052;
    if(addr == time) {
        val = static_cast<reg_t>(time_val);
    } else if(addr == timeh) {
@@ -1161,6 +1084,7 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_tvec(unsigned addr, reg_t&
    val = FEAT & features_e::FEAT_CLIC ? csr[addr] : csr[addr] & ~2;
    return iss::Ok;
 }
+
 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_status(unsigned addr, reg_t& val) {
    val = state.mstatus & hart_state_type::get_mask((addr >> 8) & 0x3);
@@ -1272,7 +1196,7 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_epc(unsigned addr, reg_t
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_dcsr_dcsr(unsigned addr, reg_t val) {
+iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_dcsr(unsigned addr, reg_t val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    //                  +-------------- ebreakm
@@ -1284,7 +1208,7 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_dcsr_dcsr(unsigned addr,
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_dcsr_reg(unsigned addr, reg_t& val) {
+iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_debug(unsigned addr, reg_t& val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    val = csr[addr];
@@ -1292,7 +1216,7 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_dcsr_reg(unsigned addr, re
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_dcsr_reg(unsigned addr, reg_t val) {
+iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_dscratch(unsigned addr, reg_t val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    csr[addr] = val;
@@ -1300,7 +1224,7 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_dcsr_reg(unsigned addr, r
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_dpc_reg(unsigned addr, reg_t& val) {
+iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_dpc(unsigned addr, reg_t& val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    val = this->reg.DPC;
@@ -1308,7 +1232,7 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_dpc_reg(unsigned addr, reg
 }

 template <typename BASE, features_e FEAT, typename LOGCAT>
-iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_dpc_reg(unsigned addr, reg_t val) {
+iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_dpc(unsigned addr, reg_t val) {
    if(!debug_mode_active())
        throw illegal_instruction_fault(this->fault_data);
    this->reg.DPC = val;
@@ -1324,6 +1248,18 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_intstatus(unsigned addr, r
    return iss::Ok;
 }

+template <typename BASE, features_e FEAT, typename LOGCAT>
+iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_fcsr(unsigned addr, reg_t& val) {
+    val = this->get_fcsr();
+    return iss::Ok;
+}
+
+template <typename BASE, features_e FEAT, typename LOGCAT>
+iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_fcsr(unsigned addr, reg_t val) {
+    this->set_fcsr(val);
+    return iss::Ok;
+}
+
 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_intthresh(unsigned addr, reg_t val) {
    csr[addr] = (val & 0xff) | (1 << (cfg.clic_int_ctl_bits)) - 1;
@@ -1347,65 +1283,53 @@ iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::read_mem(phys_addr_t paddr, uns
    }
    return iss::Ok;
 }
-
 template <typename BASE, features_e FEAT, typename LOGCAT>
 iss::status riscv_hart_mu_p<BASE, FEAT, LOGCAT>::write_mem(phys_addr_t paddr, unsigned length, const uint8_t* const data) {
-    switch(paddr.val) {
-    // TODO remove UART, Peripherals should not be part of the ISS
-    case 0xFFFF0000: // UART0 base, TXFIFO reg
-        if(((char)data[0]) == '\n' || data[0] == 0) {
-            CPPLOG(INFO) << "UART" << ((paddr.val >> 12) & 0x3) << " send '" << uart_buf.str() << "'";
-            uart_buf.str("");
-        } else if(((char)data[0]) != '\r')
-            uart_buf << (char)data[0];
-        break;
-    default: {
-        mem_type::page_type& p = mem(paddr.val / mem.page_size);
-        std::copy(data, data + length, p.data() + (paddr.val & mem.page_addr_mask));
-        // tohost handling in case of riscv-test
-        if(paddr.access && iss::access_type::FUNC) {
-            auto tohost_upper =
-                (traits<BASE>::XLEN == 32 && paddr.val == (tohost + 4)) || (traits<BASE>::XLEN == 64 && paddr.val == tohost);
-            auto tohost_lower = (traits<BASE>::XLEN == 32 && paddr.val == tohost) || (traits<BASE>::XLEN == 64 && paddr.val == tohost);
-            if(tohost_lower || tohost_upper) {
-                uint64_t hostvar = *reinterpret_cast<uint64_t*>(p.data() + (tohost & mem.page_addr_mask));
-                // in case of 32 bit system, two writes to tohost are needed, only evaluate on the second (high) write
-                if(tohost_upper && (tohost_lower || tohost_lower_written)) {
-                    switch(hostvar >> 48) {
-                    case 0:
-                        if(hostvar != 0x1) {
-                            CPPLOG(FATAL) << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar
-                                          << "), stopping simulation";
-                        } else {
-                            CPPLOG(INFO) << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar
-                                         << "), stopping simulation";
-                        }
-                        this->reg.trap_state = std::numeric_limits<uint32_t>::max();
-                        this->interrupt_sim = hostvar;
-#ifndef WITH_TCC
-                        throw(iss::simulation_stopped(hostvar));
-#endif
-                        break;
-                    case 0x0101: {
-                        char c = static_cast<char>(hostvar & 0xff);
-                        if(c == '\n' || c == 0) {
-                            CPPLOG(INFO) << "tohost send '" << uart_buf.str() << "'";
-                            uart_buf.str("");
-                        } else
-                            uart_buf << c;
-                    } break;
-                    default:
-                        break;
-                    }
-                    tohost_lower_written = false;
-                } else if(tohost_lower)
-                    tohost_lower_written = true;
-            } else if((traits<BASE>::XLEN == 32 && paddr.val == fromhost + 4) || (traits<BASE>::XLEN == 64 && paddr.val == fromhost)) {
-                uint64_t fhostvar = *reinterpret_cast<uint64_t*>(p.data() + (fromhost & mem.page_addr_mask));
-                *reinterpret_cast<uint64_t*>(p.data() + (tohost & mem.page_addr_mask)) = fhostvar;
+    mem_type::page_type& p = mem(paddr.val / mem.page_size);
+    std::copy(data, data + length, p.data() + (paddr.val & mem.page_addr_mask));
+    // tohost handling in case of riscv-test
+    // according to https://github.com/riscv-software-src/riscv-isa-sim/issues/364#issuecomment-607657754:
+    if(paddr.access && iss::access_type::FUNC) {
+        if(paddr.val == tohost) {
+            reg_t cur_data = *reinterpret_cast<const reg_t*>(data);
+            // Extract Device (bits 63:56)
+            uint8_t device = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 56) & 0xFF;
+            // Extract Command (bits 55:48)
+            uint8_t command = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 48) & 0xFF;
+            // Extract payload (bits 47:0)
+            uint64_t payload_addr = cur_data & 0xFFFFFFFFFFFFULL;
+            if(payload_addr & 1) {
+                CPPLOG(FATAL) << "tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr
+                              << "), stopping simulation";
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
+            } else if(device == 0 && command == 0) {
+                std::array<uint64_t, 8> loaded_payload;
+                if(read(address_type::PHYSICAL, access_type::DEBUG_READ, traits<BASE>::MEM, payload_addr, 8 * sizeof(uint64_t),
+                        reinterpret_cast<uint8_t*>(loaded_payload.data())) == iss::Err)
+                    CPPLOG(ERR) << "Syscall read went wrong";
+                uint64_t syscall_num = loaded_payload.at(0);
+                if(syscall_num == 64) { // SYS_WRITE
+                    return execute_sys_write(this, loaded_payload, traits<BASE>::MEM);
+                } else {
+                    CPPLOG(ERR) << "tohost syscall with number 0x" << std::hex << syscall_num << std::dec << " (" << syscall_num
+                                << ") not implemented";
+                    this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                    this->interrupt_sim = payload_addr;
+                    return iss::Ok;
+                }
+            } else {
+                CPPLOG(ERR) << "tohost functionality not implemented for device " << device << " and command " << command;
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
            }
        }
-    }
+        if((traits<BASE>::XLEN == 32 && paddr.val == fromhost + 4) || (traits<BASE>::XLEN == 64 && paddr.val == fromhost)) {
+            uint64_t fhostvar = *reinterpret_cast<uint64_t*>(p.data() + (fromhost & mem.page_addr_mask));
+            *reinterpret_cast<uint64_t*>(p.data() + (tohost & mem.page_addr_mask)) = fhostvar;
+        }
    }
    return iss::Ok;
 }
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017 - 2020 MINRES Technologies GmbH
+ * Copyright (C) 2024 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -30,8 +30,8 @@
 *
 *******************************************************************************/

-#ifndef _ISS_DEBUGGER_RISCV_TARGET_ADAPTER_H_
-#define _ISS_DEBUGGER_RISCV_TARGET_ADAPTER_H_
+#ifndef _ISS_ARCH_DEBUGGER_RISCV_TARGET_ADAPTER_H_
+#define _ISS_ARCH_DEBUGGER_RISCV_TARGET_ADAPTER_H_

 #include "iss/arch_if.h"
 #include <iss/arch/traits.h>
@@ -48,6 +48,10 @@

 namespace iss {
 namespace debugger {
+
+char const* const get_csr_name(unsigned);
+constexpr auto csr_offset = 100U;
+
 using namespace iss::arch;
 using namespace iss::debugger;

@@ -129,11 +133,17 @@ public:

 protected:
    static inline constexpr addr_t map_addr(const addr_t& i) { return i; }
-
+    std::string csr_xml;
    iss::arch_if* core;
    rp_thread_ref thread_idx;
 };

+template <typename ARCH> typename std::enable_if<iss::arch::traits<ARCH>::FLEN != 0, unsigned>::type get_f0_offset() {
+    return iss::arch::traits<ARCH>::F0;
+}
+
+template <typename ARCH> typename std::enable_if<iss::arch::traits<ARCH>::FLEN == 0, unsigned>::type get_f0_offset() { return 0; }
+
 template <typename ARCH> status riscv_target_adapter<ARCH>::set_gen_thread(rp_thread_ref& thread) {
    thread_idx = thread;
    return Ok;
@@ -175,34 +185,37 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::current_thread_query

 template <typename ARCH> status riscv_target_adapter<ARCH>::read_registers(std::vector<uint8_t>& data, std::vector<uint8_t>& avail) {
    CPPLOG(TRACE) << "reading target registers";
-    // return idx<0?:;
    data.clear();
    avail.clear();
    const uint8_t* reg_base = core->get_regs_base_ptr();
    auto start_reg = arch::traits<ARCH>::X0;
-    for(size_t reg_no = start_reg; reg_no < start_reg + 33 /*arch::traits<ARCH>::NUM_REGS*/; ++reg_no) {
-        auto reg_width = arch::traits<ARCH>::reg_bit_widths[reg_no] / 8;
-        unsigned offset = traits<ARCH>::reg_byte_offsets[reg_no];
-        for(size_t j = 0; j < reg_width; ++j) {
-            data.push_back(*(reg_base + offset + j));
-            avail.push_back(0xff);
+    for(size_t i = 0; i < 33; ++i) {
+        if(i < arch::traits<ARCH>::RFS || i == arch::traits<ARCH>::PC) {
+            auto reg_no = i < 32 ? start_reg + i : arch::traits<ARCH>::PC;
+            unsigned offset = traits<ARCH>::reg_byte_offsets[reg_no];
+            for(size_t j = 0; j < arch::traits<ARCH>::XLEN / 8; ++j) {
+                data.push_back(*(reg_base + offset + j));
+                avail.push_back(0xff);
+            }
+        } else {
+            for(size_t j = 0; j < arch::traits<ARCH>::XLEN / 8; ++j) {
+                data.push_back(0);
+                avail.push_back(0);
+            }
+        }
+    }
+    if(iss::arch::traits<ARCH>::FLEN > 0) {
+        auto fstart_reg = get_f0_offset<ARCH>();
+        for(size_t i = 0; i < 32; ++i) {
+            auto reg_no = fstart_reg + i;
+            auto reg_width = arch::traits<ARCH>::reg_bit_widths[reg_no] / 8;
+            unsigned offset = traits<ARCH>::reg_byte_offsets[reg_no];
+            for(size_t j = 0; j < reg_width; ++j) {
+                data.push_back(*(reg_base + offset + j));
+                avail.push_back(0xff);
+            }
        }
    }
-    // work around fill with F type registers
-    //    if (arch::traits<ARCH>::NUM_REGS < 65) {
-    //        auto reg_width = sizeof(typename arch::traits<ARCH>::reg_t);
-    //        for (size_t reg_no = 0; reg_no < 33; ++reg_no) {
-    //            for (size_t j = 0; j < reg_width; ++j) {
-    //                data.push_back(0x0);
-    //                avail.push_back(0x00);
-    //            }
-    //            // if(arch::traits<ARCH>::XLEN < 64)
-    //            //     for(unsigned j=0; j<4; ++j){
-    //            //         data.push_back(0x0);
-    //            //         avail.push_back(0x00);
-    //            //     }
-    //        }
-    //    }
    return Ok;
 }

@@ -210,25 +223,25 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::write_registers(cons
    auto start_reg = arch::traits<ARCH>::X0;
    auto* reg_base = core->get_regs_base_ptr();
    auto iter = data.data();
-    bool e_ext = arch::traits<ARCH>::PC < 32;
-    for(size_t reg_no = 0; reg_no < start_reg + 33 /*arch::traits<ARCH>::NUM_REGS*/; ++reg_no) {
-        if(e_ext && reg_no > 15) {
-            if(reg_no == 32) {
-                auto reg_width = arch::traits<ARCH>::reg_bit_widths[arch::traits<ARCH>::PC] / 8;
-                auto offset = traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::PC];
-                std::copy(iter, iter + reg_width, reg_base);
-            } else {
-                const uint64_t zero_val = 0;
-                auto reg_width = arch::traits<ARCH>::reg_bit_widths[15] / 8;
-                auto iter = (uint8_t*)&zero_val;
-                std::copy(iter, iter + reg_width, reg_base);
-            }
-        } else {
-            auto reg_width = arch::traits<ARCH>::reg_bit_widths[reg_no] / 8;
-            auto offset = traits<ARCH>::reg_byte_offsets[reg_no];
-            std::copy(iter, iter + reg_width, reg_base);
-            iter += 4;
-            reg_base += offset;
+    auto iter_end = data.data() + data.size();
+    for(size_t i = 0; i < 33 && iter < iter_end; ++i) {
+        auto reg_width = arch::traits<ARCH>::XLEN / 8;
+        if(i < arch::traits<ARCH>::RFS) {
+            auto offset = traits<ARCH>::reg_byte_offsets[start_reg + i];
+            std::copy(iter, iter + reg_width, reg_base + offset);
+        } else if(i == 32) {
+            auto offset = traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::PC];
+            std::copy(iter, iter + reg_width, reg_base + offset);
+        }
+        iter += reg_width;
+    }
+    if(iss::arch::traits<ARCH>::FLEN > 0) {
+        auto fstart_reg = get_f0_offset<ARCH>();
+        auto reg_width = arch::traits<ARCH>::FLEN / 8;
+        for(size_t i = 0; i < 32 && iter < iter_end; ++i) {
+            unsigned offset = traits<ARCH>::reg_byte_offsets[fstart_reg + i];
+            std::copy(iter, iter + reg_width, reg_base + offset);
+            iter += reg_width;
        }
    }
    return Ok;
@@ -236,7 +249,7 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::write_registers(cons

 template <typename ARCH>
 status riscv_target_adapter<ARCH>::read_single_register(unsigned int reg_no, std::vector<uint8_t>& data, std::vector<uint8_t>& avail) {
-    if(reg_no < 65) {
+    if(reg_no < csr_offset) {
        // auto reg_size = arch::traits<ARCH>::reg_bit_width(static_cast<typename
        // arch::traits<ARCH>::reg_e>(reg_no))/8;
        auto* reg_base = core->get_regs_base_ptr();
@@ -247,23 +260,24 @@ status riscv_target_adapter<ARCH>::read_single_register(unsigned int reg_no, std
        std::copy(reg_base + offset, reg_base + offset + reg_width, data.begin());
        std::fill(avail.begin(), avail.end(), 0xff);
    } else {
-        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_READ, traits<ARCH>::CSR, reg_no - 65);
+        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_READ, traits<ARCH>::CSR, reg_no - csr_offset);
        data.resize(sizeof(typename traits<ARCH>::reg_t));
        avail.resize(sizeof(typename traits<ARCH>::reg_t));
        std::fill(avail.begin(), avail.end(), 0xff);
        core->read(a, data.size(), data.data());
+        std::fill(avail.begin(), avail.end(), 0xff);
    }
    return data.size() > 0 ? Ok : Err;
 }

 template <typename ARCH> status riscv_target_adapter<ARCH>::write_single_register(unsigned int reg_no, const std::vector<uint8_t>& data) {
-    if(reg_no < 65) {
+    if(reg_no < csr_offset) {
        auto* reg_base = core->get_regs_base_ptr();
        auto reg_width = arch::traits<ARCH>::reg_bit_widths[static_cast<typename arch::traits<ARCH>::reg_e>(reg_no)] / 8;
        auto offset = traits<ARCH>::reg_byte_offsets[reg_no];
        std::copy(data.begin(), data.begin() + reg_width, reg_base + offset);
    } else {
-        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_WRITE, traits<ARCH>::CSR, reg_no - 65);
+        typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_WRITE, traits<ARCH>::CSR, reg_no - csr_offset);
        core->write(a, data.size(), data.data());
    }
    return Ok;
@@ -276,7 +290,7 @@ template <typename ARCH> status riscv_target_adapter<ARCH>::read_mem(uint64_t ad
 }

 template <typename ARCH> status riscv_target_adapter<ARCH>::write_mem(uint64_t addr, const std::vector<uint8_t>& data) {
-    auto a = map_addr({iss::access_type::DEBUG_READ, iss::address_type::VIRTUAL, 0, addr});
+    auto a = map_addr({iss::access_type::DEBUG_WRITE, iss::address_type::VIRTUAL, 0, addr});
    auto f = [&]() -> status { return core->write(a, data.size(), data.data()); };
    return srv->execute_syncronized(f);
 }
@@ -369,93 +383,57 @@ status riscv_target_adapter<ARCH>::resume_from_addr(bool step, int sig, uint64_t
 }

 template <typename ARCH> status riscv_target_adapter<ARCH>::target_xml_query(std::string& out_buf) {
-    const std::string res{"<?xml version=\"1.0\"?><!DOCTYPE target SYSTEM \"gdb-target.dtd\">"
-                          "<target><architecture>riscv:rv32</architecture>"
-                          //"  <feature name=\"org.gnu.gdb.riscv.rv32i\">\n"
-                          //"    <reg name=\"x0\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x1\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x2\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x3\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x4\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x5\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x6\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x7\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x8\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x9\"  bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x10\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x11\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x12\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x13\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x14\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x15\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x16\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x17\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x18\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x19\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x20\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x21\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x22\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x23\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x24\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x25\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x26\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x27\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x28\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x29\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x30\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"    <reg name=\"x31\" bitsize=\"32\" group=\"general\"/>\n"
-                          //"  </feature>\n"
-                          "</target>"};
-    out_buf = res;
+    if(!csr_xml.size()) {
+        std::ostringstream oss;
+        oss << "<?xml version=\"1.0\"?><!DOCTYPE feature SYSTEM \"gdb-target.dtd\"><target version=\"1.0\">\n";
+        if(iss::arch::traits<ARCH>::XLEN == 32)
+            oss << "<architecture>riscv:rv32</architecture>\n";
+        else if(iss::arch::traits<ARCH>::XLEN == 64)
+            oss << "  <architectureriscv:rv64</architecture>\n";
+        oss << "  <feature name=\"org.gnu.gdb.riscv.cpu\">\n";
+        auto reg_base_num = iss::arch::traits<ARCH>::X0;
+        for(auto i = 0U; i < iss::arch::traits<ARCH>::RFS; ++i) {
+            oss << "    <reg name=\"x" << i << "\" bitsize=\"" << iss::arch::traits<ARCH>::reg_bit_widths[reg_base_num + i]
+                << "\" type=\"int\" regnum=\"" << i << "\"/>\n";
+        }
+        oss << "    <reg name=\"pc\" bitsize=\"" << iss::arch::traits<ARCH>::reg_bit_widths[iss::arch::traits<ARCH>::PC]
+            << "\" type=\"code_ptr\" regnum=\"" << 32U << "\"/>\n";
+        oss << "  </feature>\n";
+        if(iss::arch::traits<ARCH>::FLEN > 0) {
+            oss << "  <feature name=\"org.gnu.gdb.riscv.fpu\">\n";
+            auto reg_base_num = get_f0_offset<ARCH>();
+            auto type = iss::arch::traits<ARCH>::FLEN == 32 ? "ieee_single" : "riscv_double";
+            for(auto i = 0U; i < 32; ++i) {
+                oss << "    <reg name=\"f" << i << "\" bitsize=\"" << iss::arch::traits<ARCH>::reg_bit_widths[reg_base_num + i]
+                    << "\" type=\"" << type << "\" regnum=\"" << i + 33 << "\"/>\n";
+            }
+            oss << "    <reg name=\"fcsr\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN << "\" regnum=\"103\" type int/>\n";
+            oss << "    <reg name=\"fflags\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN << "\" regnum=\"101\" type int/>\n";
+            oss << "    <reg name=\"frm\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN << "\" regnum=\"102\" type int/>\n";
+            oss << "  </feature>\n";
+        }
+        oss << "  <feature name=\"org.gnu.gdb.riscv.csr\">\n";
+        std::vector<uint8_t> data;
+        std::vector<uint8_t> avail;
+        data.resize(sizeof(typename traits<ARCH>::reg_t));
+        avail.resize(sizeof(typename traits<ARCH>::reg_t));
+        for(auto i = 0U; i < 4096; ++i) {
+            typed_addr_t<iss::address_type::PHYSICAL> a(iss::access_type::DEBUG_READ, traits<ARCH>::CSR, i);
+            std::fill(avail.begin(), avail.end(), 0xff);
+            auto res = core->read(a, data.size(), data.data());
+            if(res == iss::Ok) {
+                oss << "    <reg name=\"" << get_csr_name(i) << "\" bitsize=\"" << iss::arch::traits<ARCH>::XLEN
+                    << "\"  type=\"int\" regnum=\"" << (i + csr_offset) << "\"/>\n";
+            }
+        }
+        oss << "  </feature>\n";
+        oss << "</target>\n";
+        csr_xml = oss.str();
+    }
+    out_buf = csr_xml;
    return Ok;
 }
-
-/*
- *
-<?xml version="1.0"?>
-<!DOCTYPE target SYSTEM "gdb-target.dtd">
-<target>
-  <architecture>riscv:rv32</architecture>
-
-  <feature name="org.gnu.gdb.riscv.rv32i">
-    <reg name="x0"  bitsize="32" group="general"/>
-    <reg name="x1"  bitsize="32" group="general"/>
-    <reg name="x2"  bitsize="32" group="general"/>
-    <reg name="x3"  bitsize="32" group="general"/>
-    <reg name="x4"  bitsize="32" group="general"/>
-    <reg name="x5"  bitsize="32" group="general"/>
-    <reg name="x6"  bitsize="32" group="general"/>
-    <reg name="x7"  bitsize="32" group="general"/>
-    <reg name="x8"  bitsize="32" group="general"/>
-    <reg name="x9"  bitsize="32" group="general"/>
-    <reg name="x10" bitsize="32" group="general"/>
-    <reg name="x11" bitsize="32" group="general"/>
-    <reg name="x12" bitsize="32" group="general"/>
-    <reg name="x13" bitsize="32" group="general"/>
-    <reg name="x14" bitsize="32" group="general"/>
-    <reg name="x15" bitsize="32" group="general"/>
-    <reg name="x16" bitsize="32" group="general"/>
-    <reg name="x17" bitsize="32" group="general"/>
-    <reg name="x18" bitsize="32" group="general"/>
-    <reg name="x19" bitsize="32" group="general"/>
-    <reg name="x20" bitsize="32" group="general"/>
-    <reg name="x21" bitsize="32" group="general"/>
-    <reg name="x22" bitsize="32" group="general"/>
-    <reg name="x23" bitsize="32" group="general"/>
-    <reg name="x24" bitsize="32" group="general"/>
-    <reg name="x25" bitsize="32" group="general"/>
-    <reg name="x26" bitsize="32" group="general"/>
-    <reg name="x27" bitsize="32" group="general"/>
-    <reg name="x28" bitsize="32" group="general"/>
-    <reg name="x29" bitsize="32" group="general"/>
-    <reg name="x30" bitsize="32" group="general"/>
-    <reg name="x31" bitsize="32" group="general"/>
-  </feature>
-
-</target>
-
- */
 } // namespace debugger
 } // namespace iss

-#endif /* _ISS_DEBUGGER_RISCV_TARGET_ADAPTER_H_ */
+#endif /* _ISS_ARCH_DEBUGGER_RISCV_TARGET_ADAPTER_H_ */
@@ -69,7 +69,8 @@ int main(int argc, char* argv[]) {
        ("logfile,l", po::value<std::string>(), "Sets default log file.")
        ("disass,d", po::value<std::string>()->implicit_value(""), "Enables disassembly")
        ("gdb-port,g", po::value<unsigned>()->default_value(0), "enable gdb server and specify port to use")
-        ("instructions,i", po::value<uint64_t>()->default_value(std::numeric_limits<uint64_t>::max()), "max. number of instructions to simulate")
+        ("ilimit,i", po::value<uint64_t>()->default_value(std::numeric_limits<uint64_t>::max()), "max. number of instructions to simulate")
+        ("flimit", po::value<uint64_t>()->default_value(std::numeric_limits<uint64_t>::max()), "max. number of fetches to simulate")
        ("reset,r", po::value<std::string>(), "reset address")
        ("dump-ir", "dump the intermediate representation")
        ("elf,f", po::value<std::vector<std::string>>(), "ELF file(s) to load")
@@ -140,7 +141,10 @@ int main(int argc, char* argv[]) {
            std::tie(cpu, vm) = f.create(isa_opt, clim["gdb-port"].as<unsigned>(), &semihosting_cb);
        }
        if(!cpu) {
-            CPPLOG(ERR) << "Could not create cpu for isa " << isa_opt << " and backend " << clim["backend"].as<std::string>() << std::endl;
+            auto list = f.get_names();
+            std::sort(std::begin(list), std::end(list));
+            CPPLOG(ERR) << "Could not create cpu for isa " << isa_opt << " and backend " << clim["backend"].as<std::string>() << "\n"
+                        << "Available implementations (core|platform|backend):\n  - " << util::join(list, "\n  - ") << std::endl;
            return 127;
        }
        if(!vm) {
@@ -202,21 +206,36 @@ int main(int argc, char* argv[]) {
        if(clim.count("elf"))
            for(std::string input : clim["elf"].as<std::vector<std::string>>()) {
                auto start_addr = vm->get_arch()->load_file(input);
-                if(start_addr.second) // FIXME: this always evaluates to true as load file always returns <sth, true>
+                if(start_addr.second)
                    start_address = start_addr.first;
+                else {
+                    LOG(ERR) << "Error occured while loading file " << input << std::endl;
+                    return 1;
+                }
            }
        for(std::string input : args) {
            auto start_addr = vm->get_arch()->load_file(input); // treat remaining arguments as elf files
-            if(start_addr.second) // FIXME: this always evaluates to true as load file always returns <sth, true>
+            if(start_addr.second)
                start_address = start_addr.first;
+            else {
+                LOG(ERR) << "Error occured while loading file " << input << std::endl;
+                return 1;
+            }
        }
        if(clim.count("reset")) {
            auto str = clim["reset"].as<std::string>();
            start_address = str.find("0x") == 0 ? std::stoull(str.substr(2), nullptr, 16) : std::stoull(str, nullptr, 10);
        }
        vm->reset(start_address);
-        auto cycles = clim["instructions"].as<uint64_t>();
-        res = vm->start(cycles, dump);
+        auto limit = clim["ilimit"].as<uint64_t>();
+        auto cond = iss::finish_cond_e::JUMP_TO_SELF;
+        if(clim.count("flimit")) {
+            cond = cond | iss::finish_cond_e::FCOUNT_LIMIT;
+            limit = clim["flimit"].as<uint64_t>();
+        } else {
+            cond = cond | iss::finish_cond_e::ICOUNT_LIMIT;
+        }
+        res = vm->start(limit, dump, cond);

        auto instr_if = vm->get_arch()->get_instrumentation_if();
        // this assumes a single input file
@@ -42,7 +42,6 @@
 #include <iss/plugin/loader.h>
 #endif
 #include "sc_core_adapter_if.h"
-#include <iss/arch/tgc_mapper.h>
 #include <scc/report.h>
 #include <util/ities.h>
 #include <iostream>
@@ -125,7 +124,7 @@ using vm_ptr = std::unique_ptr<iss::vm_if>;

 class core_wrapper {
 public:
-    core_wrapper(core_complex* owner)
+    core_wrapper(core_complex_if* owner)
    : owner(owner) {}

    void reset(uint64_t addr) { vm->reset(addr); }
@@ -181,7 +180,7 @@ public:
                                             "SystemC sub-commands: break <time>, print_time"});
    }

-    core_complex* const owner;
+    core_complex_if* const owner;
    vm_ptr vm{nullptr};
    sc_cpu_ptr cpu{nullptr};
    iss::debugger::target_adapter_if* tgt_adapter{nullptr};
@@ -197,9 +196,9 @@ struct core_trace {
    scv_tr_handle tr_handle;
 };

-SC_HAS_PROCESS(core_complex); // NOLINT
 #ifndef CWR_SYSTEMC
-core_complex::core_complex(sc_module_name const& name)
+template <unsigned int BUSWIDTH>
+core_complex<BUSWIDTH>::core_complex(sc_module_name const& name)
 : sc_module(name)
 , fetch_lut(tlm_dmi_ext())
 , read_lut(tlm_dmi_ext())
@@ -208,7 +207,7 @@ core_complex::core_complex(sc_module_name const& name)
 }
 #endif

-void core_complex::init() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::init() {
    trc = new core_trace();
    ibus.register_invalidate_direct_mem_ptr([=](uint64_t start, uint64_t end) -> void {
        auto lut_entry = fetch_lut.getEntry(start);
@@ -227,6 +226,7 @@ void core_complex::init() {
        }
    });

+    SC_HAS_PROCESS(core_complex<BUSWIDTH>); // NOLINT
    SC_THREAD(run);
    SC_METHOD(rst_cb);
    sensitive << rst_i;
@@ -252,16 +252,16 @@ void core_complex::init() {
 #endif
 }

-core_complex::~core_complex() {
+template <unsigned int BUSWIDTH> core_complex<BUSWIDTH>::~core_complex() {
    delete cpu;
    delete trc;
    for(auto* p : plugin_list)
        delete p;
 }

-void core_complex::trace(sc_trace_file* trf) const {}
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::trace(sc_trace_file* trf) const {}

-void core_complex::before_end_of_elaboration() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::before_end_of_elaboration() {
    SCCDEBUG(SCMOD) << "instantiating iss::arch::tgf with " << GET_PROP_VALUE(backend) << " backend";
    // cpu = scc::make_unique<core_wrapper>(this);
    cpu = new core_wrapper(this);
@@ -302,7 +302,7 @@ void core_complex::before_end_of_elaboration() {
    }
 }

-void core_complex::start_of_simulation() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::start_of_simulation() {
    // quantum_keeper.reset();
    if(GET_PROP_VALUE(elf_file).size() > 0) {
        istringstream is(GET_PROP_VALUE(elf_file));
@@ -325,7 +325,7 @@ void core_complex::start_of_simulation() {
    }
 }

-bool core_complex::disass_output(uint64_t pc, const std::string instr_str) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::disass_output(uint64_t pc, const std::string instr_str) {
    if(trc->m_db == nullptr)
        return false;
    if(trc->tr_handle.is_active())
@@ -339,7 +339,7 @@ bool core_complex::disass_output(uint64_t pc, const std::string instr_str) {
    return true;
 }

-void core_complex::forward() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::forward() {
 #ifndef CWR_SYSTEMC
    set_clock_period(clk_i.read());
 #else
@@ -348,24 +348,24 @@ void core_complex::forward() {
 #endif
 }

-void core_complex::set_clock_period(sc_core::sc_time period) {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::set_clock_period(sc_core::sc_time period) {
    curr_clk = period;
    if(period == SC_ZERO_TIME)
        cpu->set_interrupt_execution(true);
 }

-void core_complex::rst_cb() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::rst_cb() {
    if(rst_i.read())
        cpu->set_interrupt_execution(true);
 }

-void core_complex::sw_irq_cb() { cpu->local_irq(3, sw_irq_i.read()); }
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::sw_irq_cb() { cpu->local_irq(3, sw_irq_i.read()); }

-void core_complex::timer_irq_cb() { cpu->local_irq(7, timer_irq_i.read()); }
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::timer_irq_cb() { cpu->local_irq(7, timer_irq_i.read()); }

-void core_complex::ext_irq_cb() { cpu->local_irq(11, ext_irq_i.read()); }
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::ext_irq_cb() { cpu->local_irq(11, ext_irq_i.read()); }

-void core_complex::local_irq_cb() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::local_irq_cb() {
    for(auto i = 0U; i < local_irq_i.size(); ++i) {
        if(local_irq_i[i].event()) {
            cpu->local_irq(16 + i, local_irq_i[i].read());
@@ -373,7 +373,7 @@ void core_complex::local_irq_cb() {
    }
 }

-void core_complex::run() {
+template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::run() {
    wait(SC_ZERO_TIME); // separate from elaboration phase
    do {
        wait(SC_ZERO_TIME);
@@ -391,7 +391,7 @@ void core_complex::run() {
    sc_stop();
 }

-bool core_complex::read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) {
    auto& dmi_lut = is_fetch ? fetch_lut : read_lut;
    auto lut_entry = dmi_lut.getEntry(addr);
    if(lut_entry.get_granted_access() != tlm::tlm_dmi::DMI_ACCESS_NONE && addr + length <= lut_entry.get_end_address() + 1) {
@@ -449,7 +449,7 @@ bool core_complex::read_mem(uint64_t addr, unsigned length, uint8_t* const data,
    }
 }

-bool core_complex::write_mem(uint64_t addr, unsigned length, const uint8_t* const data) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::write_mem(uint64_t addr, unsigned length, const uint8_t* const data) {
    auto lut_entry = write_lut.getEntry(addr);
    if(lut_entry.get_granted_access() != tlm::tlm_dmi::DMI_ACCESS_NONE && addr + length <= lut_entry.get_end_address() + 1) {
        auto offset = addr - lut_entry.get_start_address();
@@ -497,7 +497,7 @@ bool core_complex::write_mem(uint64_t addr, unsigned length, const uint8_t* cons
    }
 }

-bool core_complex::read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) {
    tlm::tlm_generic_payload gp;
    gp.set_command(tlm::TLM_READ_COMMAND);
    gp.set_address(addr);
@@ -507,7 +507,7 @@ bool core_complex::read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const d
    return dbus->transport_dbg(gp) == length;
 }

-bool core_complex::write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) {
+template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) {
    write_buf.resize(length);
    std::copy(data, data + length, write_buf.begin()); // need to copy as TLM does not guarantee data integrity
    tlm::tlm_generic_payload gp;
@@ -518,5 +518,10 @@ bool core_complex::write_mem_dbg(uint64_t addr, unsigned length, const uint8_t*
    gp.set_streaming_width(length);
    return dbus->transport_dbg(gp) == length;
 }
+
+template class core_complex<scc::LT>;
+template class core_complex<32>;
+template class core_complex<64>;
+
 } /* namespace tgfs */
 } /* namespace sysc */
@@ -33,6 +33,7 @@
 #ifndef _SYSC_CORE_COMPLEX_H_
 #define _SYSC_CORE_COMPLEX_H_

+#include <scc/signal_opt_ports.h>
 #include <scc/tick2time.h>
 #include <scc/traceable.h>
 #include <scc/utilities.h>
@@ -40,10 +41,8 @@
 #include <tlm/scc/scv/tlm_rec_initiator_socket.h>
 #ifdef CWR_SYSTEMC
 #include <scmlinc/scml_property.h>
-#define SOCKET_WIDTH 32
 #else
 #include <cci_configuration>
-#define SOCKET_WIDTH scc::LT
 #endif
 #include <memory>
 #include <tlm>
@@ -68,12 +67,35 @@ public:
 namespace tgfs {
 class core_wrapper;
 struct core_trace;
+struct core_complex_if {

-class core_complex : public sc_core::sc_module, public scc::traceable {
+    virtual ~core_complex_if() = default;
+
+    virtual bool read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) = 0;
+
+    virtual bool write_mem(uint64_t addr, unsigned length, const uint8_t* const data) = 0;
+
+    virtual bool read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) = 0;
+
+    virtual bool write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) = 0;
+
+    virtual bool disass_output(uint64_t pc, const std::string instr) = 0;
+
+    virtual unsigned get_last_bus_cycles() = 0;
+
+    //! Allow quantum keeper handling
+    virtual void sync(uint64_t) = 0;
+
+    virtual char const* hier_name() = 0;
+
+    scc::sc_in_opt<uint64_t> mtime_i{"mtime_i"};
+};
+
+template <unsigned int BUSWIDTH = scc::LT> class core_complex : public sc_core::sc_module, public scc::traceable, public core_complex_if {
 public:
-    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<SOCKET_WIDTH>> ibus{"ibus"};
+    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<BUSWIDTH>> ibus{"ibus"};

-    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<SOCKET_WIDTH>> dbus{"dbus"};
+    tlm::scc::initiator_mixin<tlm::tlm_initiator_socket<BUSWIDTH>> dbus{"dbus"};

    sc_core::sc_in<bool> rst_i{"rst_i"};

@@ -88,8 +110,6 @@ public:
 #ifndef CWR_SYSTEMC
    sc_core::sc_in<sc_core::sc_time> clk_i{"clk_i"};

-    sc_core::sc_port<tlm::tlm_peek_if<uint64_t>, 1, sc_core::SC_ZERO_OR_MORE_BOUND> mtime_o{"mtime_o"};
-
    cci::cci_param<std::string> elf_file{"elf_file", ""};

    cci::cci_param<bool> enable_disass{"enable_disass", false};
@@ -115,8 +135,6 @@ public:
 #else
    sc_core::sc_in<bool> clk_i{"clk_i"};

-    sc_core::sc_in<uint64_t> mtime_i{"mtime_i"};
-
    scml_property<std::string> elf_file{"elf_file", ""};

    scml_property<bool> enable_disass{"enable_disass", false};
@@ -159,13 +177,13 @@ public:

    ~core_complex();

-    inline unsigned get_last_bus_cycles() {
+    unsigned get_last_bus_cycles() override {
        auto mem_incr = std::max(ibus_inc, dbus_inc);
        ibus_inc = dbus_inc = 0;
        return mem_incr > 1 ? mem_incr : 1;
    }

-    inline void sync(uint64_t cycle) {
+    void sync(uint64_t cycle) override {
        auto core_inc = curr_clk * (cycle - last_sync_cycle);
        quantum_keeper.inc(core_inc);
        if(quantum_keeper.need_sync()) {
@@ -175,20 +193,22 @@ public:
        last_sync_cycle = cycle;
    }

-    bool read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch);
+    bool read_mem(uint64_t addr, unsigned length, uint8_t* const data, bool is_fetch) override;

-    bool write_mem(uint64_t addr, unsigned length, const uint8_t* const data);
+    bool write_mem(uint64_t addr, unsigned length, const uint8_t* const data) override;

-    bool read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data);
+    bool read_mem_dbg(uint64_t addr, unsigned length, uint8_t* const data) override;

-    bool write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data);
+    bool write_mem_dbg(uint64_t addr, unsigned length, const uint8_t* const data) override;

    void trace(sc_core::sc_trace_file* trf) const override;

-    bool disass_output(uint64_t pc, const std::string instr);
+    bool disass_output(uint64_t pc, const std::string instr) override;

    void set_clock_period(sc_core::sc_time period);

+    char const* hier_name() override { return name(); }
+
 protected:
    void before_end_of_elaboration() override;
    void start_of_simulation() override;
@@ -46,12 +46,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|interp",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|interp", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -62,12 +62,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|llvm",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|llvm", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -79,12 +79,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|tcc",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|tcc", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -96,12 +96,12 @@ using namespace sysc;
 volatile std::array<bool, 2> tgc_init = {
    iss_factory::instance().register_creator("tgc5c|m_p|asmjit",
                                             [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+                                                 auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
                                                 auto* cpu = new sc_core_adapter<arch::riscv_hart_m_p<arch::tgc5c>>(cc);
                                                 return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
                                             }),
    iss_factory::instance().register_creator("tgc5c|mu_p|asmjit", [](unsigned gdb_port, void* data) -> iss_factory::base_t {
-        auto cc = reinterpret_cast<sysc::tgfs::core_complex*>(data);
+        auto cc = reinterpret_cast<sysc::tgfs::core_complex_if*>(data);
        auto* cpu = new sc_core_adapter<arch::riscv_hart_mu_p<arch::tgc5c>>(cc);
        return {sysc::sc_cpu_ptr{cpu}, vm_ptr{create(static_cast<arch::tgc5c*>(cpu), gdb_port)}};
    })};
@@ -21,7 +21,7 @@ public:
    using reg_t = typename iss::arch::traits<typename PLAT::core>::reg_t;
    using phys_addr_t = typename iss::arch::traits<typename PLAT::core>::phys_addr_t;
    using heart_state_t = typename PLAT::hart_state_type;
-    sc_core_adapter(sysc::tgfs::core_complex* owner)
+    sc_core_adapter(sysc::tgfs::core_complex_if* owner)
    : owner(owner) {}

    iss::arch_if* get_arch_if() override { return this; }
@@ -54,9 +54,9 @@ public:
            std::stringstream s;
            s << "[p:" << lvl[this->reg.PRIV] << ";s:0x" << std::hex << std::setfill('0') << std::setw(sizeof(reg_t) * 2)
              << (reg_t)this->state.mstatus << std::dec << ";c:" << this->reg.icount + this->cycle_offset << "]";
-            SCCDEBUG(owner->name()) << "disass: "
-                                    << "0x" << std::setw(16) << std::right << std::setfill('0') << std::hex << pc << "\t\t" << std::setw(40)
-                                    << std::setfill(' ') << std::left << instr << s.str();
+            SCCDEBUG(owner->hier_name()) << "disass: "
+                                         << "0x" << std::setw(16) << std::right << std::setfill('0') << std::hex << pc << "\t\t"
+                                         << std::setw(40) << std::setfill(' ') << std::left << instr << s.str();
        }
    };

@@ -79,10 +79,10 @@ public:
                    switch(hostvar >> 48) {
                    case 0:
                        if(hostvar != 0x1) {
-                            SCCINFO(owner->name())
+                            SCCINFO(owner->hier_name())
                                << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar << "), stopping simulation";
                        } else {
-                            SCCINFO(owner->name())
+                            SCCINFO(owner->hier_name())
                                << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar << "), stopping simulation";
                        }
                        this->reg.trap_state = std::numeric_limits<uint32_t>::max();
@@ -112,21 +112,8 @@ public:
    }

    iss::status read_csr(unsigned addr, reg_t& val) override {
-#ifndef CWR_SYSTEMC
-        if((addr == iss::arch::time || addr == iss::arch::timeh) && owner->mtime_o.get_interface(0)) {
-            uint64_t time_val;
-            bool ret = owner->mtime_o->nb_peek(time_val);
-            if(addr == iss::arch::time) {
-                val = static_cast<reg_t>(time_val);
-            } else if(addr == iss::arch::timeh) {
-                if(sizeof(reg_t) != 4)
-                    return iss::Err;
-                val = static_cast<reg_t>(time_val >> 32);
-            }
-            return ret ? iss::Ok : iss::Err;
-#else
        if((addr == iss::arch::time || addr == iss::arch::timeh)) {
-            uint64_t time_val = owner->mtime_i.read();
+            uint64_t time_val = owner->mtime_i.get_interface() ? owner->mtime_i.read() : 0;
            if(addr == iss::arch::time) {
                val = static_cast<reg_t>(time_val);
            } else if(addr == iss::arch::timeh) {
@@ -135,14 +122,13 @@ public:
                val = static_cast<reg_t>(time_val >> 32);
            }
            return iss::Ok;
-#endif
        } else {
            return PLAT::read_csr(addr, val);
        }
    }

    void wait_until(uint64_t flags) override {
-        SCCDEBUG(owner->name()) << "Sleeping until interrupt";
+        SCCDEBUG(owner->hier_name()) << "Sleeping until interrupt";
        while(this->reg.pending_trap == 0 && (this->csr[iss::arch::mip] & this->csr[iss::arch::mie]) == 0) {
            sc_core::wait(wfi_evt);
        }
@@ -173,11 +159,11 @@ public:
            this->csr[iss::arch::mip] &= ~mask;
        this->check_interrupt();
        if(value)
-            SCCTRACE(owner->name()) << "Triggering interrupt " << id << " Pending trap: " << this->reg.pending_trap;
+            SCCTRACE(owner->hier_name()) << "Triggering interrupt " << id << " Pending trap: " << this->reg.pending_trap;
    }

 private:
-    sysc::tgfs::core_complex* const owner;
+    sysc::tgfs::core_complex_if* const owner{nullptr};
    sc_core::sc_event wfi_evt;
    uint64_t hostvar{std::numeric_limits<uint64_t>::max()};
    unsigned to_host_wr_cnt = 0;
@@ -88,14 +88,13 @@ protected:
    using super::write_reg_to_mem;
    using super::gen_read_mem;
    using super::gen_write_mem;
-    using super::gen_wait;
    using super::gen_leave;
    using super::gen_sync;
   
    using this_class = vm_impl<ARCH>;
    using compile_func = continuation_e (this_class::*)(virt_addr_t&, code_word_t, jit_holder&);

-    continuation_e gen_single_inst_behavior(virt_addr_t&, unsigned int &, jit_holder&) override;
+    continuation_e gen_single_inst_behavior(virt_addr_t&, jit_holder&) override;
    enum globals_e {TVAL = 0, GLOBALS_SIZE};
    void gen_block_prologue(jit_holder& jh) override;
    void gen_block_epilogue(jit_holder& jh) override;
@@ -104,7 +103,7 @@ protected:
    void gen_instr_prologue(jit_holder& jh);
    void gen_instr_epilogue(jit_holder& jh);
    inline void gen_raise(jit_holder& jh, uint16_t trap_id, uint16_t cause);
-    template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>> void gen_set_tval(jit_holder& jh, T new_tval) ;
+    template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type> void gen_set_tval(jit_holder& jh, T new_tval) ;
    void gen_set_tval(jit_holder& jh, x86_reg_t _new_tval) ;

    template<unsigned W, typename U, typename S = typename std::make_signed<U>::type>
@@ -112,7 +111,8 @@ protected:
        auto mask = (1ULL<<W) - 1;
        auto sign_mask = 1ULL<<(W-1);
        return (from & mask) | ((from & sign_mask) ? ~mask : 0);
-    } 
+    }
+
 private:
    /****************************************************************************
     * start opcode definitions
@@ -500,6 +500,7 @@ private:
                (gen_operation(cc, band, (gen_operation(cc, add, load_reg_from_mem(jh, traits::X0 + rs1), (int16_t)sext<12>(imm))
                ), addr_mask)
                ), 32, true);
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, urem, new_pc, static_cast<uint32_t>(traits::INSTR_ALIGNMENT))
            ,0);
@@ -521,6 +522,7 @@ private:
                    mov(cc, get_ptr_for(jh, traits::LAST_BRANCH), static_cast<int>(UNKNOWN_JUMP));
                }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = BRANCH;
        
@@ -566,6 +568,7 @@ private:
            gen_raise(jh, 0, static_cast<int32_t>(traits::RV_CAUSE_ILLEGAL_INSTRUCTION));
        }
        else{
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, eq, load_reg_from_mem(jh, traits::X0 + rs1), load_reg_from_mem(jh, traits::X0 + rs2))
            ,0);
@@ -583,6 +586,7 @@ private:
                }
            }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = BRANCH;
        
@@ -628,6 +632,7 @@ private:
            gen_raise(jh, 0, static_cast<int32_t>(traits::RV_CAUSE_ILLEGAL_INSTRUCTION));
        }
        else{
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, ne, load_reg_from_mem(jh, traits::X0 + rs1), load_reg_from_mem(jh, traits::X0 + rs2))
            ,0);
@@ -645,6 +650,7 @@ private:
                }
            }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = BRANCH;
        
@@ -690,6 +696,7 @@ private:
            gen_raise(jh, 0, static_cast<int32_t>(traits::RV_CAUSE_ILLEGAL_INSTRUCTION));
        }
        else{
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, lt, gen_ext(cc, 
                load_reg_from_mem(jh, traits::X0 + rs1), 32, false), gen_ext(cc, 
@@ -709,6 +716,7 @@ private:
                }
            }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = BRANCH;
        
@@ -754,6 +762,7 @@ private:
            gen_raise(jh, 0, static_cast<int32_t>(traits::RV_CAUSE_ILLEGAL_INSTRUCTION));
        }
        else{
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, gte, gen_ext(cc, 
                load_reg_from_mem(jh, traits::X0 + rs1), 32, false), gen_ext(cc, 
@@ -773,6 +782,7 @@ private:
                }
            }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = BRANCH;
        
@@ -818,6 +828,7 @@ private:
            gen_raise(jh, 0, static_cast<int32_t>(traits::RV_CAUSE_ILLEGAL_INSTRUCTION));
        }
        else{
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, ltu, load_reg_from_mem(jh, traits::X0 + rs1), load_reg_from_mem(jh, traits::X0 + rs2))
            ,0);
@@ -835,6 +846,7 @@ private:
                }
            }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = BRANCH;
        
@@ -880,6 +892,7 @@ private:
            gen_raise(jh, 0, static_cast<int32_t>(traits::RV_CAUSE_ILLEGAL_INSTRUCTION));
        }
        else{
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, gteu, load_reg_from_mem(jh, traits::X0 + rs1), load_reg_from_mem(jh, traits::X0 + rs2))
            ,0);
@@ -897,6 +910,7 @@ private:
                }
            }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = BRANCH;
        
@@ -1407,23 +1421,21 @@ private:
        }
        else{
            if(rd!=0){
-                {
-                auto label_then = cc.newLabel();
-                auto label_merge = cc.newLabel();
-                auto tmp_reg = get_reg_for(cc, 1);
+                auto label_then11 = cc.newLabel();
+                auto label_merge11 = cc.newLabel();
+                auto tmp_reg11 = get_reg(cc, 8, false);
                cmp(cc, gen_ext(cc, 
                    load_reg_from_mem(jh, traits::X0 + rs1), 32, true), (int16_t)sext<12>(imm));
-                cc.jl(label_then);
-                mov(cc, tmp_reg,0);
-                cc.jmp(label_merge);
-                cc.bind(label_then);
-                mov(cc, tmp_reg,1);
-                cc.bind(label_merge);
+                cc.jl(label_then11);
+                mov(cc, tmp_reg11,0);
+                cc.jmp(label_merge11);
+                cc.bind(label_then11);
+                mov(cc, tmp_reg11, 1);
+                cc.bind(label_merge11);
                mov(cc, get_ptr_for(jh, traits::X0+ rd),
-                      gen_ext(cc, tmp_reg
+                      gen_ext(cc, tmp_reg11
                      , 32, false)
                );
-                }
            }
        }
        auto returnValue = CONT;
@@ -1470,22 +1482,20 @@ private:
        }
        else{
            if(rd!=0){
-                {
-                auto label_then = cc.newLabel();
-                auto label_merge = cc.newLabel();
-                auto tmp_reg = get_reg_for(cc, 1);
+                auto label_then12 = cc.newLabel();
+                auto label_merge12 = cc.newLabel();
+                auto tmp_reg12 = get_reg(cc, 8, false);
                cmp(cc, load_reg_from_mem(jh, traits::X0 + rs1), (uint32_t)((int16_t)sext<12>(imm)));
-                cc.jb(label_then);
-                mov(cc, tmp_reg,0);
-                cc.jmp(label_merge);
-                cc.bind(label_then);
-                mov(cc, tmp_reg,1);
-                cc.bind(label_merge);
+                cc.jb(label_then12);
+                mov(cc, tmp_reg12,0);
+                cc.jmp(label_merge12);
+                cc.bind(label_then12);
+                mov(cc, tmp_reg12, 1);
+                cc.bind(label_merge12);
                mov(cc, get_ptr_for(jh, traits::X0+ rd),
-                      gen_ext(cc, tmp_reg
+                      gen_ext(cc, tmp_reg12
                      , 32, false)
                );
-                }
            }
        }
        auto returnValue = CONT;
@@ -1978,24 +1988,22 @@ private:
        }
        else{
            if(rd!=0){
-                {
-                auto label_then = cc.newLabel();
-                auto label_merge = cc.newLabel();
-                auto tmp_reg = get_reg_for(cc, 1);
+                auto label_then13 = cc.newLabel();
+                auto label_merge13 = cc.newLabel();
+                auto tmp_reg13 = get_reg(cc, 8, false);
                cmp(cc, gen_ext(cc, 
                    load_reg_from_mem(jh, traits::X0 + rs1), 32, true), gen_ext(cc, 
                    load_reg_from_mem(jh, traits::X0 + rs2), 32, true));
-                cc.jl(label_then);
-                mov(cc, tmp_reg,0);
-                cc.jmp(label_merge);
-                cc.bind(label_then);
-                mov(cc, tmp_reg,1);
-                cc.bind(label_merge);
+                cc.jl(label_then13);
+                mov(cc, tmp_reg13,0);
+                cc.jmp(label_merge13);
+                cc.bind(label_then13);
+                mov(cc, tmp_reg13, 1);
+                cc.bind(label_merge13);
                mov(cc, get_ptr_for(jh, traits::X0+ rd),
-                      gen_ext(cc, tmp_reg
+                      gen_ext(cc, tmp_reg13
                      , 32, false)
                );
-                }
            }
        }
        auto returnValue = CONT;
@@ -2042,22 +2050,20 @@ private:
        }
        else{
            if(rd!=0){
-                {
-                auto label_then = cc.newLabel();
-                auto label_merge = cc.newLabel();
-                auto tmp_reg = get_reg_for(cc, 1);
+                auto label_then14 = cc.newLabel();
+                auto label_merge14 = cc.newLabel();
+                auto tmp_reg14 = get_reg(cc, 8, false);
                cmp(cc, load_reg_from_mem(jh, traits::X0 + rs1), load_reg_from_mem(jh, traits::X0 + rs2));
-                cc.jb(label_then);
-                mov(cc, tmp_reg,0);
-                cc.jmp(label_merge);
-                cc.bind(label_then);
-                mov(cc, tmp_reg,1);
-                cc.bind(label_merge);
+                cc.jb(label_then14);
+                mov(cc, tmp_reg14,0);
+                cc.jmp(label_merge14);
+                cc.bind(label_then14);
+                mov(cc, tmp_reg14, 1);
+                cc.bind(label_merge14);
                mov(cc, get_ptr_for(jh, traits::X0+ rd),
-                      gen_ext(cc, tmp_reg
+                      gen_ext(cc, tmp_reg14
                      , 32, false)
                );
-                }
            }
        }
        auto returnValue = CONT;
@@ -2364,7 +2370,7 @@ private:
        if(this->disass_enabled){
            /* generate disass */
            
-            //This disass is not yet implemented
+            //No disass specified, using instruction name
            std::string mnemonic = "ecall";
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -2401,7 +2407,7 @@ private:
        if(this->disass_enabled){
            /* generate disass */
            
-            //This disass is not yet implemented
+            //No disass specified, using instruction name
            std::string mnemonic = "ebreak";
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -2438,7 +2444,7 @@ private:
        if(this->disass_enabled){
            /* generate disass */
            
-            //This disass is not yet implemented
+            //No disass specified, using instruction name
            std::string mnemonic = "mret";
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -2475,7 +2481,7 @@ private:
        if(this->disass_enabled){
            /* generate disass */
            
-            //This disass is not yet implemented
+            //No disass specified, using instruction name
            std::string mnemonic = "wfi";
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -2497,7 +2503,10 @@ private:
        gen_instr_prologue(jh);
        cc.comment("//behavior:");
        /*generate behavior*/
-        gen_wait(jh, 1);
+        InvokeNode* call_wait_15;
+        jh.cc.comment("//call_wait");
+        jh.cc.invoke(&call_wait_15, &wait, FuncSignature::build<void, int32_t>());
+        setArg(call_wait_15, 0, 1);
        auto returnValue = CONT;
        
        gen_sync(jh, POST_SYNC, 41);
@@ -3116,6 +3125,7 @@ private:
            auto divisor = gen_ext(cc, 
                load_reg_from_mem(jh, traits::X0 + rs2), 32, true);
            if(rd!=0){
+                {
                auto label_merge = cc.newLabel();
                cmp(cc, gen_operation(cc, ne, divisor, 0)
                ,0);
@@ -3123,6 +3133,7 @@ private:
                cc.je(label_else);
                {
                    auto MMIN = ((uint32_t)1)<<(static_cast<uint32_t>(traits::XLEN)-1);
+                    {
                    auto label_merge = cc.newLabel();
                    cmp(cc, gen_operation(cc, land, gen_operation(cc, eq, load_reg_from_mem(jh, traits::X0 + rs1), MMIN)
                    , gen_operation(cc, eq, divisor, - 1)
@@ -3143,6 +3154,7 @@ private:
                                      ), 32, true));
                        }
                    cc.bind(label_merge);
+                    }
                }
                cc.jmp(label_merge);
                cc.bind(label_else);
@@ -3151,6 +3163,7 @@ private:
                              (uint32_t)- 1);
                    }
                cc.bind(label_merge);
+                }
            }
        }
        auto returnValue = CONT;
@@ -3196,6 +3209,7 @@ private:
            gen_raise(jh, 0, static_cast<int32_t>(traits::RV_CAUSE_ILLEGAL_INSTRUCTION));
        }
        else{
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, ne, load_reg_from_mem(jh, traits::X0 + rs2), 0)
            ,0);
@@ -3217,6 +3231,7 @@ private:
                    }
                }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = CONT;
        
@@ -3261,6 +3276,7 @@ private:
            gen_raise(jh, 0, static_cast<int32_t>(traits::RV_CAUSE_ILLEGAL_INSTRUCTION));
        }
        else{
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, ne, load_reg_from_mem(jh, traits::X0 + rs2), 0)
            ,0);
@@ -3268,6 +3284,7 @@ private:
            cc.je(label_else);
            {
                auto MMIN = (uint32_t)1<<(static_cast<uint32_t>(traits::XLEN)-1);
+                {
                auto label_merge = cc.newLabel();
                cmp(cc, gen_operation(cc, land, gen_operation(cc, eq, load_reg_from_mem(jh, traits::X0 + rs1), MMIN)
                , gen_operation(cc, eq, gen_ext(cc, 
@@ -3296,6 +3313,7 @@ private:
                        }
                    }
                cc.bind(label_merge);
+                }
            }
            cc.jmp(label_merge);
            cc.bind(label_else);
@@ -3306,6 +3324,7 @@ private:
                    }
                }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = CONT;
        
@@ -3350,6 +3369,7 @@ private:
            gen_raise(jh, 0, static_cast<int32_t>(traits::RV_CAUSE_ILLEGAL_INSTRUCTION));
        }
        else{
+            {
            auto label_merge = cc.newLabel();
            cmp(cc, gen_operation(cc, ne, load_reg_from_mem(jh, traits::X0 + rs2), 0)
            ,0);
@@ -3371,6 +3391,7 @@ private:
                    }
                }
            cc.bind(label_merge);
+            }
        }
        auto returnValue = CONT;
        
@@ -3388,7 +3409,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {imm:#05x}", fmt::arg("mnemonic", "c__addi4spn"),
+                "{mnemonic:10} {rd}, {imm:#05x}", fmt::arg("mnemonic", "c.addi4spn"),
                fmt::arg("rd", name(8+rd)), fmt::arg("imm", imm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3436,7 +3457,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {uimm:#05x}({rs1})", fmt::arg("mnemonic", "c__lw"),
+                "{mnemonic:10} {rd}, {uimm:#05x}({rs1})", fmt::arg("mnemonic", "c.lw"),
                fmt::arg("rd", name(8+rd)), fmt::arg("uimm", uimm), fmt::arg("rs1", name(8+rs1)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3482,7 +3503,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs2}, {uimm:#05x}({rs1})", fmt::arg("mnemonic", "c__sw"),
+                "{mnemonic:10} {rs2}, {uimm:#05x}({rs1})", fmt::arg("mnemonic", "c.sw"),
                fmt::arg("rs2", name(8+rs2)), fmt::arg("uimm", uimm), fmt::arg("rs1", name(8+rs1)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3525,7 +3546,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs1}, {imm:#05x}", fmt::arg("mnemonic", "c__addi"),
+                "{mnemonic:10} {rs1}, {imm:#05x}", fmt::arg("mnemonic", "c.addi"),
                fmt::arg("rs1", name(rs1)), fmt::arg("imm", imm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3572,8 +3593,8 @@ private:
        if(this->disass_enabled){
            /* generate disass */
            
-            //This disass is not yet implemented
-            std::string mnemonic = "c__nop";
+            //No disass specified, using instruction name
+            std::string mnemonic = "c.nop";
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
            jh.disass_collection.push_back(mnemonic_ptr);
@@ -3609,7 +3630,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {imm:#05x}", fmt::arg("mnemonic", "c__jal"),
+                "{mnemonic:10} {imm:#05x}", fmt::arg("mnemonic", "c.jal"),
                fmt::arg("imm", imm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3653,7 +3674,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {imm:#05x}", fmt::arg("mnemonic", "c__li"),
+                "{mnemonic:10} {rd}, {imm:#05x}", fmt::arg("mnemonic", "c.li"),
                fmt::arg("rd", name(rd)), fmt::arg("imm", imm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3700,7 +3721,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {imm:#05x}", fmt::arg("mnemonic", "c__lui"),
+                "{mnemonic:10} {rd}, {imm:#05x}", fmt::arg("mnemonic", "c.lui"),
                fmt::arg("rd", name(rd)), fmt::arg("imm", imm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3744,7 +3765,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {nzimm:#05x}", fmt::arg("mnemonic", "c__addi16sp"),
+                "{mnemonic:10} {nzimm:#05x}", fmt::arg("mnemonic", "c.addi16sp"),
                fmt::arg("nzimm", nzimm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3789,8 +3810,8 @@ private:
        if(this->disass_enabled){
            /* generate disass */
            
-            //This disass is not yet implemented
-            std::string mnemonic = "__reserved_clui";
+            //No disass specified, using instruction name
+            std::string mnemonic = ".reserved_clui";
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
            jh.disass_collection.push_back(mnemonic_ptr);
@@ -3828,7 +3849,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs1}, {shamt}", fmt::arg("mnemonic", "c__srli"),
+                "{mnemonic:10} {rs1}, {shamt}", fmt::arg("mnemonic", "c.srli"),
                fmt::arg("rs1", name(8+rs1)), fmt::arg("shamt", shamt));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3869,7 +3890,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs1}, {shamt}", fmt::arg("mnemonic", "c__srai"),
+                "{mnemonic:10} {rs1}, {shamt}", fmt::arg("mnemonic", "c.srai"),
                fmt::arg("rs1", name(8+rs1)), fmt::arg("shamt", shamt));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3923,7 +3944,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs1}, {imm:#05x}", fmt::arg("mnemonic", "c__andi"),
+                "{mnemonic:10} {rs1}, {imm:#05x}", fmt::arg("mnemonic", "c.andi"),
                fmt::arg("rs1", name(8+rs1)), fmt::arg("imm", imm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -3965,7 +3986,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c__sub"),
+                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c.sub"),
                fmt::arg("rd", name(8+rd)), fmt::arg("rs2", name(8+rs2)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4007,7 +4028,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c__xor"),
+                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c.xor"),
                fmt::arg("rd", name(8+rd)), fmt::arg("rs2", name(8+rs2)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4048,7 +4069,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c__or"),
+                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c.or"),
                fmt::arg("rd", name(8+rd)), fmt::arg("rs2", name(8+rs2)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4089,7 +4110,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c__and"),
+                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c.and"),
                fmt::arg("rd", name(8+rd)), fmt::arg("rs2", name(8+rs2)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4129,7 +4150,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {imm:#05x}", fmt::arg("mnemonic", "c__j"),
+                "{mnemonic:10} {imm:#05x}", fmt::arg("mnemonic", "c.j"),
                fmt::arg("imm", imm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4171,7 +4192,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs1}, {imm:#05x}", fmt::arg("mnemonic", "c__beqz"),
+                "{mnemonic:10} {rs1}, {imm:#05x}", fmt::arg("mnemonic", "c.beqz"),
                fmt::arg("rs1", name(8+rs1)), fmt::arg("imm", imm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4194,6 +4215,7 @@ private:
        cc.comment("//behavior:");
        /*generate behavior*/
        mov(jh.cc, get_ptr_for(jh, traits::LAST_BRANCH), static_cast<int>(NO_JUMP));
+        {
        auto label_merge = cc.newLabel();
        cmp(cc, gen_operation(cc, eq, load_reg_from_mem(jh, traits::X0 + rs1+8), 0)
        ,0);
@@ -4204,6 +4226,7 @@ private:
            mov(cc, get_ptr_for(jh, traits::LAST_BRANCH), static_cast<int>(KNOWN_JUMP));
        }
        cc.bind(label_merge);
+        }
        auto returnValue = BRANCH;
        
        gen_sync(jh, POST_SYNC, 75);
@@ -4220,7 +4243,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs1}, {imm:#05x}", fmt::arg("mnemonic", "c__bnez"),
+                "{mnemonic:10} {rs1}, {imm:#05x}", fmt::arg("mnemonic", "c.bnez"),
                fmt::arg("rs1", name(8+rs1)), fmt::arg("imm", imm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4243,6 +4266,7 @@ private:
        cc.comment("//behavior:");
        /*generate behavior*/
        mov(jh.cc, get_ptr_for(jh, traits::LAST_BRANCH), static_cast<int>(NO_JUMP));
+        {
        auto label_merge = cc.newLabel();
        cmp(cc, gen_operation(cc, ne, load_reg_from_mem(jh, traits::X0 + rs1+8), 0)
        ,0);
@@ -4253,6 +4277,7 @@ private:
            mov(cc, get_ptr_for(jh, traits::LAST_BRANCH), static_cast<int>(KNOWN_JUMP));
        }
        cc.bind(label_merge);
+        }
        auto returnValue = BRANCH;
        
        gen_sync(jh, POST_SYNC, 76);
@@ -4269,7 +4294,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs1}, {nzuimm}", fmt::arg("mnemonic", "c__slli"),
+                "{mnemonic:10} {rs1}, {nzuimm}", fmt::arg("mnemonic", "c.slli"),
                fmt::arg("rs1", name(rs1)), fmt::arg("nzuimm", nzuimm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4317,7 +4342,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, sp, {uimm:#05x}", fmt::arg("mnemonic", "c__lwsp"),
+                "{mnemonic:10} {rd}, sp, {uimm:#05x}", fmt::arg("mnemonic", "c.lwsp"),
                fmt::arg("rd", name(rd)), fmt::arg("uimm", uimm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4367,7 +4392,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c__mv"),
+                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c.mv"),
                fmt::arg("rd", name(rd)), fmt::arg("rs2", name(rs2)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4413,7 +4438,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs1}", fmt::arg("mnemonic", "c__jr"),
+                "{mnemonic:10} {rs1}", fmt::arg("mnemonic", "c.jr"),
                fmt::arg("rs1", name(rs1)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4459,8 +4484,8 @@ private:
        if(this->disass_enabled){
            /* generate disass */
            
-            //This disass is not yet implemented
-            std::string mnemonic = "__reserved_cmv";
+            //No disass specified, using instruction name
+            std::string mnemonic = ".reserved_cmv";
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
            jh.disass_collection.push_back(mnemonic_ptr);
@@ -4498,7 +4523,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c__add"),
+                "{mnemonic:10} {rd}, {rs2}", fmt::arg("mnemonic", "c.add"),
                fmt::arg("rd", name(rd)), fmt::arg("rs2", name(rs2)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4546,7 +4571,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs1}", fmt::arg("mnemonic", "c__jalr"),
+                "{mnemonic:10} {rs1}", fmt::arg("mnemonic", "c.jalr"),
                fmt::arg("rs1", name(rs1)));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4595,8 +4620,8 @@ private:
        if(this->disass_enabled){
            /* generate disass */
            
-            //This disass is not yet implemented
-            std::string mnemonic = "c__ebreak";
+            //No disass specified, using instruction name
+            std::string mnemonic = "c.ebreak";
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
            jh.disass_collection.push_back(mnemonic_ptr);
@@ -4634,7 +4659,7 @@ private:
            /* generate disass */
            
            auto mnemonic = fmt::format(
-                "{mnemonic:10} {rs2}, {uimm:#05x}(sp)", fmt::arg("mnemonic", "c__swsp"),
+                "{mnemonic:10} {rs2}, {uimm:#05x}(sp)", fmt::arg("mnemonic", "c.swsp"),
                fmt::arg("rs2", name(rs2)), fmt::arg("uimm", uimm));
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4679,7 +4704,7 @@ private:
        if(this->disass_enabled){
            /* generate disass */
            
-            //This disass is not yet implemented
+            //No disass specified, using instruction name
            std::string mnemonic = "dii";
            InvokeNode* call_print_disass;
            char* mnemonic_ptr = strdup(mnemonic.c_str());
@@ -4735,7 +4760,7 @@ private:
        gen_raise(jh, 0, 2);
        gen_sync(jh, POST_SYNC, instr_descr.size());
        gen_instr_epilogue(jh);
-        return BRANCH;
+        return ILLEGAL_INSTR;
    }
 };

@@ -4755,7 +4780,7 @@ vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
    }()) {}

 template <typename ARCH>
-continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, jit_holder& jh) {
+continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, jit_holder& jh) {
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
@@ -4764,10 +4789,9 @@ continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned
        paddr = this->core.virt2phys(pc);
    auto res = this->core.read(paddr, 4, data);
    if (res != iss::Ok)
-        throw trap_access(TRAP_ID, pc.val);
+        return ILLEGAL_FETCH;
    if (instr == 0x0000006f || (instr&0xffff)==0xa001)
-        throw simulation_stopped(0); // 'J 0' or 'C.J 0'
-    ++inst_cnt;
+        return JUMP_TO_SELF;
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
@@ -4797,6 +4821,7 @@ void vm_impl<ARCH>::gen_instr_epilogue(jit_holder& jh) {
    cmp(cc, current_trap_state, 0);
    cc.jne(jh.trap_entry);
    cc.inc(get_ptr_for(jh, traits::ICOUNT));
+    cc.inc(get_ptr_for(jh, traits::CYCLE));
 }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_block_prologue(jit_holder& jh){
@@ -4842,6 +4867,7 @@ inline void vm_impl<ARCH>::gen_raise(jit_holder& jh, uint16_t trap_id, uint16_t
    auto tmp1 = get_reg_for(cc, traits::TRAP_STATE);
    mov(cc, tmp1, 0x80ULL << 24 | (cause << 16) | trap_id);
    mov(cc, get_ptr_for(jh, traits::TRAP_STATE), tmp1);
+    cc.jmp(jh.trap_entry);
 }
 template <typename ARCH>
 template <typename T, typename>
@@ -4850,8 +4876,8 @@ void vm_impl<ARCH>::gen_set_tval(jit_holder& jh, T new_tval) {
    }
 template <typename ARCH>
 void vm_impl<ARCH>::gen_set_tval(jit_holder& jh, x86_reg_t _new_tval) {
-    if(std::holds_alternative<x86::Gp>(_new_tval)) {
-        x86::Gp new_tval = std::get<x86::Gp>(_new_tval);
+    if(nonstd::holds_alternative<x86::Gp>(_new_tval)) {
+        x86::Gp new_tval = nonstd::get<x86::Gp>(_new_tval);
        if(new_tval.size() < 8)
            new_tval = gen_ext_Gp(jh.cc, new_tval, 64, false);
        mov(jh.cc, jh.globals[TVAL], new_tval);
@@ -128,7 +128,6 @@ uint32_t fcmp_s(uint32_t v1, uint32_t v2, uint32_t op) {
 }

 uint32_t fcvt_s(uint32_t v1, uint32_t op, uint8_t mode) {
-
    float32_t v1f{v1};
    softfloat_exceptionFlags = 0;
    float32_t r;
@@ -204,8 +203,8 @@ uint32_t fclass_s(uint32_t v1) {
    uA.f = a;
    uiA = uA.ui;

-    uint_fast16_t infOrNaN = expF32UI(uiA) == 0xFF;
-    uint_fast16_t subnormalOrZero = expF32UI(uiA) == 0;
+    bool infOrNaN = expF32UI(uiA) == 0xFF;
+    bool subnormalOrZero = expF32UI(uiA) == 0;
    bool sign = signF32UI(uiA);
    bool fracZero = fracF32UI(uiA) == 0;
    bool isNaN = isNaNF32UI(uiA);
@@ -218,9 +217,13 @@ uint32_t fclass_s(uint32_t v1) {
 }

 uint32_t fconv_d2f(uint64_t v1, uint8_t mode) {
+    bool isNan = isNaNF64UI(v1);
+    bool isSNaN = softfloat_isSigNaNF64UI(v1);
    softfloat_roundingMode = rmm_map.at(mode);
-    bool nan = (v1 & defaultNaNF64UI) == defaultNaNF64UI;
-    if(nan) {
+    softfloat_exceptionFlags = 0;
+    if(isNan) {
+        if(isSNaN)
+            softfloat_raiseFlags(softfloat_flag_invalid);
        return defaultNaNF32UI;
    } else {
        float32_t res = f64_to_f32(float64_t{v1});
@@ -229,11 +232,11 @@ uint32_t fconv_d2f(uint64_t v1, uint8_t mode) {
 }

 uint64_t fconv_f2d(uint32_t v1, uint8_t mode) {
-    bool nan = (v1 & defaultNaNF32UI) == defaultNaNF32UI;
-    if(nan) {
+    bool infOrNaN = expF32UI(v1) == 0xFF;
+    bool subnormalOrZero = expF32UI(v1) == 0;
+    if(infOrNaN || subnormalOrZero) {
        return defaultNaNF64UI;
    } else {
-        softfloat_roundingMode = rmm_map.at(mode);
        float64_t res = f32_to_f64(float32_t{v1});
        return res.v;
    }
@@ -313,22 +316,23 @@ uint64_t fcmp_d(uint64_t v1, uint64_t v2, uint32_t op) {
 }

 uint64_t fcvt_d(uint64_t v1, uint32_t op, uint8_t mode) {
+
    float64_t v1f{v1};
    softfloat_exceptionFlags = 0;
    float64_t r;
    switch(op) {
-    case 0: { // l->d, fp to int32
+    case 0: { // l from d
        int64_t res = f64_to_i64(v1f, rmm_map.at(mode), true);
        return (uint64_t)res;
    }
-    case 1: { // lu->s
+    case 1: { // lu from d
        uint64_t res = f64_to_ui64(v1f, rmm_map.at(mode), true);
        return res;
    }
-    case 2: // s->l
+    case 2: // d from l
        r = i64_to_f64(v1);
        return r.v;
-    case 3: // s->lu
+    case 3: // d from lu
        r = ui64_to_f64(v1);
        return r.v;
    }
@@ -336,12 +340,24 @@ uint64_t fcvt_d(uint64_t v1, uint32_t op, uint8_t mode) {
 }

 uint64_t fmadd_d(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mode) {
-    // op should be {softfloat_mulAdd_subProd(2), softfloat_mulAdd_subC(1)}
+    uint64_t F64_SIGN = 1ULL << 63;
+    switch(op) {
+    case 0: // FMADD_D
+        break;
+    case 1: // FMSUB_D
+        v3 ^= F64_SIGN;
+        break;
+    case 2: // FNMADD_D
+        v1 ^= F64_SIGN;
+        v3 ^= F64_SIGN;
+        break;
+    case 3: // FNMSUB_D
+        v1 ^= F64_SIGN;
+        break;
+    }
    softfloat_roundingMode = rmm_map.at(mode);
    softfloat_exceptionFlags = 0;
-    float64_t res = softfloat_mulAddF64(v1, v2, v3, op & 0x1);
-    if(op > 1)
-        res.v ^= 1ULL << 63;
+    float64_t res = softfloat_mulAddF64(v1, v2, v3, 0);
    return res.v;
 }

@@ -377,8 +393,8 @@ uint64_t fclass_d(uint64_t v1) {
    uA.f = a;
    uiA = uA.ui;

-    uint_fast16_t infOrNaN = expF64UI(uiA) == 0x7FF;
-    uint_fast16_t subnormalOrZero = expF64UI(uiA) == 0;
+    bool infOrNaN = expF64UI(uiA) == 0x7FF;
+    bool subnormalOrZero = expF64UI(uiA) == 0;
    bool sign = signF64UI(uiA);
    bool fracZero = fracF64UI(uiA) == 0;
    bool isNaN = isNaNF64UI(uiA);
@@ -96,7 +96,8 @@ protected:
    using compile_ret_t = virt_addr_t;
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr);

-    inline const char *name(size_t index){return index<traits::reg_aliases.size()?traits::reg_aliases[index]:"illegal";}
+    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
+

    virt_addr_t execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit) override;

@@ -274,9 +275,6 @@ template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
    volatile CODE_WORD x = insn;
    insn = 2 * x;
 }
-
-template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
-
 // according to
 // https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation
 #ifdef __GCC__
@@ -332,17 +330,21 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
    while(!this->core.should_stop() &&
            !(is_icount_limit_enabled(cond) && icount >= count_limit) &&
            !(is_fcount_limit_enabled(cond) && fetch_count >= count_limit)){
-        fetch_count++;
+        if(this->debugging_enabled())
+            this->tgt_adapter->check_continue(*PC);
+        pc.val=*PC;
        if(fetch_ins(pc, data)!=iss::Ok){
-            this->do_sync(POST_SYNC, std::numeric_limits<unsigned>::max());
-            pc.val = super::core.enter_trap(std::numeric_limits<uint64_t>::max(), pc.val, 0);
+            if(this->sync_exec && PRE_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
+            process_spawn_blocks();
+            if(this->sync_exec && POST_SYNC) this->do_sync(PRE_SYNC, std::numeric_limits<unsigned>::max());
+            pc.val = super::core.enter_trap(arch::traits<ARCH>::RV_CAUSE_FETCH_ACCESS<<16, pc.val, 0);
        } else {
            if (is_jump_to_self_enabled(cond) &&
                    (instr == 0x0000006f || (instr&0xffff)==0xa001)) throw simulation_stopped(0); // 'J 0' or 'C.J 0'
            uint32_t inst_index = instr_decoder.decode_instr(instr);
            opcode_e inst_id = arch::traits<ARCH>::opcode_e::MAX_OPCODE;;
            if(inst_index <instr_descr.size())
-                inst_id = instr_descr.at(instr_decoder.decode_instr(instr)).op;
+                inst_id = instr_descr[inst_index].op;

            // pre execution stuff
            this->core.reg.last_branch = 0;
@@ -704,9 +706,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        int8_t res_27 = super::template read_mem<int8_t>(traits::MEM, load_address);
+                                        int8_t res_1 = super::template read_mem<int8_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        int8_t res = (int8_t)res_27;
+                                        int8_t res = (int8_t)res_1;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -735,9 +737,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        int16_t res_28 = super::template read_mem<int16_t>(traits::MEM, load_address);
+                                        int16_t res_2 = super::template read_mem<int16_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        int16_t res = (int16_t)res_28;
+                                        int16_t res = (int16_t)res_2;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -766,9 +768,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        int32_t res_29 = super::template read_mem<int32_t>(traits::MEM, load_address);
+                                        int32_t res_3 = super::template read_mem<int32_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        int32_t res = (int32_t)res_29;
+                                        int32_t res = (int32_t)res_3;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -797,9 +799,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        uint8_t res_30 = super::template read_mem<uint8_t>(traits::MEM, load_address);
+                                        uint8_t res_4 = super::template read_mem<uint8_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint8_t res = res_30;
+                                        uint8_t res = res_4;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -828,9 +830,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    }
                                    else {
                                        uint32_t load_address = (uint32_t)((uint64_t)(*(X+rs1) ) + (uint64_t)((int16_t)sext<12>(imm) ));
-                                        uint16_t res_31 = super::template read_mem<uint16_t>(traits::MEM, load_address);
+                                        uint16_t res_5 = super::template read_mem<uint16_t>(traits::MEM, load_address);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint16_t res = res_31;
+                                        uint16_t res = res_5;
                                        if(rd != 0) {
                                            *(X+rd) = (uint32_t)res;
                                        }
@@ -1457,7 +1459,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::ECALL: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "ecall");
+                        //No disass specified, using instruction name
+                        std::string mnemonic = "ecall";
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 4;
@@ -1470,7 +1474,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::EBREAK: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "ebreak");
+                        //No disass specified, using instruction name
+                        std::string mnemonic = "ebreak";
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 4;
@@ -1483,7 +1489,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::MRET: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "mret");
+                        //No disass specified, using instruction name
+                        std::string mnemonic = "mret";
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 4;
@@ -1496,7 +1504,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::WFI: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "wfi");
+                        //No disass specified, using instruction name
+                        std::string mnemonic = "wfi";
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 4;
@@ -1528,9 +1538,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                    else {
                                        uint32_t xrs1 = *(X+rs1);
                                        if(rd != 0) {
-                                            uint32_t res_32 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                            uint32_t res_6 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                            uint32_t xrd = res_32;
+                                            uint32_t xrd = res_6;
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrs1);
                                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
                                            *(X+rd) = xrd;
@@ -1563,9 +1573,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_33 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_7 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_33;
+                                        uint32_t xrd = res_7;
                                        uint32_t xrs1 = *(X+rs1);
                                        if(rs1 != 0) {
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrd | xrs1);
@@ -1598,9 +1608,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_34 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_8 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_34;
+                                        uint32_t xrd = res_8;
                                        uint32_t xrs1 = *(X+rs1);
                                        if(rs1 != 0) {
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrd & ~ xrs1);
@@ -1633,9 +1643,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_35 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_9 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_35;
+                                        uint32_t xrd = res_9;
                                        super::template write_mem<uint32_t>(traits::CSR, csr, (uint32_t)zimm);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
                                        if(rd != 0) {
@@ -1665,9 +1675,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_36 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_10 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_36;
+                                        uint32_t xrd = res_10;
                                        if(zimm != 0) {
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrd | (uint32_t)zimm);
                                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
@@ -1699,9 +1709,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                                        raise(0, traits::RV_CAUSE_ILLEGAL_INSTRUCTION);
                                    }
                                    else {
-                                        uint32_t res_37 = super::template read_mem<uint32_t>(traits::CSR, csr);
+                                        uint32_t res_11 = super::template read_mem<uint32_t>(traits::CSR, csr);
                                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                                        uint32_t xrd = res_37;
+                                        uint32_t xrd = res_11;
                                        if(zimm != 0) {
                                            super::template write_mem<uint32_t>(traits::CSR, csr, xrd & ~ ((uint32_t)zimm));
                                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
@@ -1720,7 +1730,7 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
                        auto mnemonic = fmt::format(
-                            "{mnemonic:10} {rs1}, {rd}, {imm}", fmt::arg("mnemonic", "fence.i"),
+                            "{mnemonic:10} {rs1}, {rd}, {imm}", fmt::arg("mnemonic", "fence_i"),
                            fmt::arg("rs1", name(rs1)), fmt::arg("rd", name(rd)), fmt::arg("imm", imm));
                        this->core.disass_output(pc.val, mnemonic);
                    }
@@ -2036,9 +2046,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    // execute instruction
                    {
                        uint32_t offs = (uint32_t)((uint64_t)(*(X+rs1 + 8) ) + (uint64_t)(uimm ));
-                        int32_t res_38 = super::template read_mem<int32_t>(traits::MEM, offs);
+                        int32_t res_12 = super::template read_mem<int32_t>(traits::MEM, offs);
                        if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                        *(X+rd + 8) = (uint32_t)(int32_t)res_38;
+                        *(X+rd + 8) = (uint32_t)(int32_t)res_12;
                    }
                    break;
                }// @suppress("No break at end of case")
@@ -2094,7 +2104,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    uint8_t nzimm = ((bit_sub<2,5>(instr)) | (bit_sub<12,1>(instr) << 5));
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "c.nop");
+                        //No disass specified, using instruction name
+                        std::string mnemonic = "c.nop";
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2200,7 +2212,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    uint8_t rd = ((bit_sub<7,5>(instr)));
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, ".reserved_clui");
+                        //No disass specified, using instruction name
+                        std::string mnemonic = ".reserved_clui";
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2458,9 +2472,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                        }
                        else {
                            uint32_t offs = (uint32_t)((uint64_t)(*(X+2) ) + (uint64_t)(uimm ));
-                            int32_t res_39 = super::template read_mem<int32_t>(traits::MEM, offs);
+                            int32_t res_13 = super::template read_mem<int32_t>(traits::MEM, offs);
                            if(this->core.reg.trap_state>=0x80000000UL) throw memory_access_exception();
-                            *(X+rd) = (uint32_t)(int32_t)res_39;
+                            *(X+rd) = (uint32_t)(int32_t)res_13;
                        }
                    }
                    break;
@@ -2519,7 +2533,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::__reserved_cmv: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, ".reserved_cmv");
+                        //No disass specified, using instruction name
+                        std::string mnemonic = ".reserved_cmv";
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2585,7 +2601,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::C__EBREAK: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "c.ebreak");
+                        //No disass specified, using instruction name
+                        std::string mnemonic = "c.ebreak";
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2624,7 +2642,9 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                case arch::traits<ARCH>::opcode_e::DII: {
                    if(this->disass_enabled){
                        /* generate console output when executing the command */
-                        this->core.disass_output(pc.val, "dii");
+                        //No disass specified, using instruction name
+                        std::string mnemonic = "dii";
+                        this->core.disass_output(pc.val, mnemonic);
                    }
                    // used registers// calculate next pc value
                    *NEXT_PC = *PC + 2;
@@ -2654,11 +2674,11 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                icount++;
                instret++;
            }
-            cycle++;
-            pc.val=*NEXT_PC;
-            this->core.reg.PC = this->core.reg.NEXT_PC;
+            *PC = *NEXT_PC;
            this->core.reg.trap_state =  this->core.reg.pending_trap;
        }
+        fetch_count++;
+        cycle++;
    }
    return pc;
 }
Author	SHA1	Message	Date
hongyu	aaebeaf023	changes the io_buf	2025-03-11 12:00:31 +01:00
eyck	f4718c6de3	Merge remote-tracking branch 'origin/feature/htif' into develop	2025-02-13 09:34:31 +01:00
eyck	53de21eef9	adds generator changed output	2025-02-12 20:45:04 +01:00
alex	d443c89c87	removes llvm from dbt-rise-tgc build system as it is handled in dbt-rise-core	2024-12-28 13:10:49 +01:00
alex	9a2df32d57	updates templates	2024-12-28 13:07:07 +01:00
alex	be0f783af8	adds cycle increment to tcc	2024-12-28 13:06:46 +01:00
alex	1089800682	updates vm_impls and core.h to work with new vm_base	2024-12-28 08:24:09 +01:00
eyck	a6a6f51f0b	adds clang-format fixes	2024-12-06 15:50:50 +01:00
alex	21e1f791ad	corrects sysc integration template and corresponding file	2024-12-06 09:49:02 +01:00
alex	be6f5791fa	adds update to cyclecount after each instr for asmjit	2024-11-26 20:26:18 +01:00
alex	d907dc7f54	corrects tohost functionality and minor cleanup	2024-11-22 17:35:12 +01:00
alex	75e81ce236	copies new tohost implemenation from hart_m_p	2024-11-14 16:51:26 +01:00
alex	82a70efdb8	small reorder to make tohost output more readable	2024-11-14 16:51:26 +01:00
alex	978c3db06e	minor improvements to readability	2024-11-14 16:51:26 +01:00
alex	0e88664ff7	adds better tohost writing implementation, allowing the standard riscv-isa-test benchmarks to run	2024-11-14 16:51:26 +01:00
alex	ac818f304d	increases verbosity incase elf loading goes wrong	2024-10-21 16:42:58 +02:00
alex	ad60449073	updates generated cores	2024-09-27 20:04:58 +02:00
alex	b45b3589fa	updates templates to immediately trap when gen_trap is called	2024-09-27 20:03:51 +02:00
alex	1fb7e8fcea	improves logging output	2024-09-24 08:39:34 +02:00
alex	5f9d0beafb	corrects softfloat to comply with RVD ACT	2024-09-23 22:22:57 +02:00
alex	4c0d1c75aa	adds addr formatting to logging	2024-09-23 12:21:43 +02:00
alex	2f3abf2f76	adds namespaces for ELFIO	2024-09-23 11:55:18 +02:00
eyck	62768bf81e	applies clang format	2024-09-23 10:05:33 +02:00
eyck	f6be8ec006	adds elfio test utility	2024-09-23 09:29:08 +02:00
eyck	a8f56b6e27	removes code dupication by unifying elf file read	2024-09-23 09:28:27 +02:00
alex	76ea0db25d	adds newest generated vm_impl	2024-08-17 23:19:51 +02:00
eyck	ec1b820c18	fixes target xml generation	2024-08-17 19:36:53 +02:00
eyck	64329cf0f6	fixes use of icount vs. cycle	2024-08-17 19:36:40 +02:00
eyck	9de0aed84d	expands some error message	2024-08-17 16:55:49 +02:00
eyck	bb4e2766d1	applies clang-format	2024-08-17 16:12:57 +02:00
eyck	0996d15bd4	removes debug code	2024-08-17 12:48:48 +02:00
eyck	6305efa7c2	implements proper target XML generation incl. CSRs	2024-08-17 12:40:40 +02:00
eyck	de79adc50d	updates debugger hook to stop before fetching instructions this relates to https://github.com/Minres/DBT-RISE-RISCV/issues/8 : Debugger loses control when trap vector fetch fails and https://github.com/Minres/DBT-RISE-RISCV/issues/7 : Two debugger single-steps are required at reset vector	2024-08-17 12:39:54 +02:00
eyck	0473aa5344	fixes SystemC wrapper wrt. templated core_complex	2024-08-17 12:34:17 +02:00
alex	a45fcd28db	updates fn calling generation	2024-08-17 08:22:04 +02:00
alex	0f15032210	removes gen_wait as wait can be called like any other extern function	2024-08-14 15:25:06 +02:00
alex	efc11d87a5	updates template with fcsr check, adds extra braces on If Statements	2024-08-14 14:32:58 +02:00
alex	4a19e27926	adds changes due to generator being more inline with others	2024-08-14 13:52:08 +02:00
alex	c15cdb0955	expands return values of jit creating functions to inhibit endless trapping	2024-08-14 11:49:59 +02:00
alex	6609d12582	adds flimit that gets properly evaluated in interp	2024-08-13 15:22:34 +02:00
alex	b5341700aa	updates template and adds braces when using conditions	2024-08-13 08:55:14 +02:00
alex	0b5062d21c	adds fp_functions here to remove dependencies in dbt-rise-core	2024-08-09 11:56:32 +02:00
alex	fbca690b3b	replaces gen_wait, updates template to include fp_functions when necessary	2024-08-08 12:57:08 +02:00
alex	235a7e6e24	updates template	2024-08-08 11:08:28 +02:00
alex	62d21e1156	updates disass	2024-08-07 09:21:07 +02:00
alex	9c51d6eade	improves interp, only calls decode once per instr	2024-08-07 09:20:11 +02:00
alex	2878dca6b5	updates templates	2024-08-06 08:32:05 +02:00
eyck	c28e8fd00c	removes left-overs	2024-08-04 18:57:20 +02:00
eyck	b3cc9d2346	makes core_complex a template	2024-08-04 18:47:32 +02:00
eyck	933f08494c	removes C++17 dependency from asmjit backend	2024-08-04 17:41:49 +02:00
eyck	21f8eab432	adds regenerated tgc5c	2024-08-02 19:18:28 +02:00
eyck	6ddb8da07f	fixes missing rename	2024-08-02 11:58:51 +02:00
eyck	edf456c59f	fixes missing braces	2024-08-02 10:33:15 +02:00
eyck	42efced1eb	fixes FCSR behavior if no floating point is implemented	2024-08-02 08:59:22 +02:00
eyck	c376e34b2b	applies clang format	2024-08-01 11:02:10 +02:00