corrects mistake from rebasing, adds newly generated templates

configures logger in main
small changes regarding vector template
2025-03-31 12:50:05 +02:00 · 2025-03-31 10:19:16 +02:00 · 2025-03-31 10:19:16 +02:00 · 2025-03-31 10:19:16 +02:00 · 2025-03-31 10:19:16 +02:00 · 2025-03-31 10:19:16 +02:00
39 changed files with 7262 additions and 3721 deletions
@@ -18,8 +18,10 @@ add_subdirectory(softfloat)
 set(LIB_SOURCES
    src/iss/plugin/instruction_count.cpp
    src/iss/arch/tgc5c.cpp
+    src/iss/mem/memory_if.cpp
    src/vm/interp/vm_tgc5c.cpp
    src/vm/fp_functions.cpp
+    src/vm/vector_functions.cpp
    src/iss/debugger/csr_names.cpp
    src/iss/semihosting/semihosting.cpp
 )
@@ -109,16 +111,6 @@ if(TARGET yaml-cpp::yaml-cpp)
    target_link_libraries(${PROJECT_NAME} PUBLIC yaml-cpp::yaml-cpp)
 endif()

-if(WITH_LLVM)
-    find_package(LLVM)
-    target_compile_definitions(${PROJECT_NAME} PUBLIC ${LLVM_DEFINITIONS})
-    target_include_directories(${PROJECT_NAME} PUBLIC ${LLVM_INCLUDE_DIRS})
-
-    if(BUILD_SHARED_LIBS)
-        target_link_libraries(${PROJECT_NAME} PUBLIC ${LLVM_LIBRARIES})
-    endif()
-endif()
-
 set_target_properties(${PROJECT_NAME} PROPERTIES
    VERSION ${PROJECT_VERSION}
    FRAMEWORK FALSE
@@ -20,7 +20,7 @@ RVI:
    mask: 0b00000000000000000000000001111111
    size:   32
    branch:   true
-    delay:   1
+    delay:   [1,1]
  JALR:
    index: 3
    encoding: 0b00000000000000000000000001100111
@@ -30,11 +30,21 @@
 *
 *******************************************************************************/
 <%
-def nativeTypeSize(int size){
-    if(size<=8) return 8; else if(size<=16) return 16; else if(size<=32) return 32; else return 64;
+def nativeSize(int size){
+    if(size<=8) return 8;
+    if(size<=16) return 16;
+    if(size<=32) return 32;
+    if(size<=64) return 64;
+    if(size<=128) return 128;
+    if(size<=256) return 256;
+    if(size<=512) return 512;
+    if(size<=1024) return 1024;
+    if(size<=2048) return 2048;
+    if(size<=4096) return 4096;
+    throw new IllegalArgumentException("Unsupported size in nativeSize in CORENAME.h.gtl");
 }
 def getRegisterSizes(){
-    def regs = registers.collect{nativeTypeSize(it.size)}
+    def regs = registers.collect{nativeSize(it.size)}
    regs+=[32,32, 64, 64, 64, 32, 32] // append TRAP_STATE, PENDING_TRAP, ICOUNT, CYCLE, INSTRET, INSTRUCTION, LAST_BRANCH
    return regs
 }
@@ -47,13 +57,7 @@ def getRegisterOffsets(){
    }
    return offsets
 }
-def byteSize(int size){
-    if(size<=8) return 8;
-    if(size<=16) return 16;
-    if(size<=32) return 32;
-    if(size<=64) return 64;
-    return 128;
-}
+
 def getCString(def val){
    return val.toString()+'ULL'
 }
@@ -84,6 +88,8 @@ template <> struct traits<${coreDef.name.toLowerCase()}> {
    enum constants {${constants.collect{c -> c.name+"="+getCString(c.value)}.join(', ')}};

    constexpr static unsigned FP_REGS_SIZE = ${constants.find {it.name=='FLEN'}?.value?:0};
+    constexpr static unsigned V_REGS_SIZE = ${constants.find {it.name=='VLEN'}?.value?:0};
+

    enum reg_e {
        ${registers.collect{it.name}.join(', ')}, NUM_REGS, TRAP_STATE=NUM_REGS, PENDING_TRAP, ICOUNT, CYCLE, INSTRET, INSTRUCTION, LAST_BRANCH
@@ -131,8 +137,6 @@ struct ${coreDef.name.toLowerCase()}: public arch_if {

    uint8_t* get_regs_base_ptr() override;

-    inline uint64_t get_icount() { return reg.icount; }
-
    inline bool should_stop() { return interrupt_sim; }

    inline uint64_t stop_code() { return interrupt_sim; }
@@ -141,20 +145,20 @@ struct ${coreDef.name.toLowerCase()}: public arch_if {

    virtual iss::sync_type needed_sync() const { return iss::NO_SYNC; }

-    inline uint32_t get_last_branch() { return reg.last_branch; }
-

 #pragma pack(push, 1)
    struct ${coreDef.name}_regs {<%
-        registers.each { reg -> if(reg.size>0) {%> 
-        uint${byteSize(reg.size)}_t ${reg.name} = 0;<%
+        registers.each { reg -> if(reg.size>64) {%>
+        uint8_t ${reg.name}[${reg.size/8}] = {0};<%
+        }else if(reg.size>0) {%>
+        uint${nativeSize(reg.size)}_t ${reg.name} = 0;<%
        }}%>
        uint32_t trap_state = 0, pending_trap = 0;
-        uint64_t icount = 0;
-        uint64_t cycle = 0;
-        uint64_t instret = 0;
-        uint32_t instruction = 0;
-        uint32_t last_branch = 0;
+        uint64_t icount = 0;      // counts number of instructions undisturbed
+        uint64_t cycle = 0;       // counts number of cycles, in functional mode equals icount
+        uint64_t instret = 0;     // counts number of instructions, can be reset via CSR write
+        uint32_t instruction = 0; // holds op code of currently executed instruction
+        uint32_t last_branch = 0; // indicates if last branch was taken
    } reg;
 #pragma pack(pop)
    std::array<address_type, 4> addr_mode;
@@ -168,6 +172,31 @@ if(fcsr != null) {%>
 <%} else { %>
    uint32_t get_fcsr(){return 0;}
    void set_fcsr(uint32_t val){}
+<%}
+def vstart = registers.find {it.name=='vstart'}
+def vl = registers.find {it.name=='vl'}
+def vtype = registers.find {it.name=='vtype'}
+def vxsat = registers.find {it.name=='vxsat'}
+def vxrm = registers.find {it.name=='vxrm'}
+
+if(vtype != null) {%>
+    uint${vstart.size}_t get_vstart(){return reg.vstart;}
+    void set_vstart(uint${vstart.size}_t val){reg.vstart = val;}
+    uint${vl.size}_t get_vl(){return reg.vl;}
+    uint${vtype.size}_t get_vtype(){return reg.vtype;}
+    uint${vxsat.size}_t get_vxsat(){return reg.vxsat;}
+    void set_vxsat(uint${vxsat.size}_t val){reg.vxsat = val;}
+    uint${vxrm.size}_t get_vxrm(){return reg.vxrm;}
+    void set_vxrm(uint${vxrm.size}_t val){reg.vxrm = val;}
+<%} else { %>
+    uint32_t get_vstart(){return 0;}
+    void set_vstart(uint32_t val){}
+    uint32_t get_vl(){return 0;}
+    uint32_t get_vtype(){return 0;}
+    uint32_t get_vxsat(){return 0;}
+    void set_vxsat(uint32_t val){}
+    uint32_t get_vxrm(){return 0;}
+    void set_vxrm(uint32_t val){}
 <%}%>
 };

@@ -96,7 +96,7 @@ protected:
    using this_class = vm_impl<ARCH>;
    using compile_func = continuation_e (this_class::*)(virt_addr_t&, code_word_t, jit_holder&);

-    continuation_e gen_single_inst_behavior(virt_addr_t&, unsigned int &, jit_holder&) override;
+    continuation_e gen_single_inst_behavior(virt_addr_t&, jit_holder&) override;
    enum globals_e {TVAL = 0, GLOBALS_SIZE};
    void gen_block_prologue(jit_holder& jh) override;
    void gen_block_epilogue(jit_holder& jh) override;
@@ -221,7 +221,7 @@ vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
    }()) {}

 template <typename ARCH>
-continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, jit_holder& jh) {
+continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, jit_holder& jh) {
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
@@ -233,7 +233,6 @@ continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned
        return ILLEGAL_FETCH;
    if (instr == 0x0000006f || (instr&0xffff)==0xa001)
        return JUMP_TO_SELF;
-    ++inst_cnt;
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
@@ -30,6 +30,9 @@
 *
 *******************************************************************************/
 <%
+def floating_point = registers.find {it.name=='FCSR'}
+def vector = registers.find {it.name=='vtype'}
+def aes = functions.find { it.contains('aes') }
 def nativeTypeSize(int size){
    if(size<=8) return 8; else if(size<=16) return 16; else if(size<=32) return 32; else return 64;
 }
@@ -41,7 +44,16 @@ def nativeTypeSize(int size){
 #include <iss/debugger/server.h>
 #include <iss/iss.h>
 #include <iss/interp/vm_base.h>
+<%
+if(floating_point != null) {%>
 #include <vm/fp_functions.h>
+<%}
+if(vector != null) {%>
+#include <vm/vector_functions.h>
+<%}
+if(aes != null) {%>
+#include <vm/aes_sbox.h>
+<%}%>
 #include <util/logging.h>
 #include <boost/coroutine2/all.hpp>
 #include <functional>
@@ -101,10 +113,48 @@ protected:
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr);

    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
-<%
-def fcsr = registers.find {it.name=='FCSR'}
-if(fcsr != null) {%>
+<% 
+if(floating_point != null) {%>
    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}     
+<%}
+if(vector != null) {%>
+    inline const char* vname(size_t index) { return index < 32 ? name(index + traits::V0) : "illegal"; }
+    inline const char* sew_name(size_t bits) {
+        switch(bits) {
+        case 0b000:
+            return "e8";
+        case 0b001:
+            return "e16";
+        case 0b010:
+            return "e32";
+        case 0b011:
+            return "e64";
+        default:
+            return "illegal";
+        }
+    }
+    inline const char* lmul_name(size_t bits) {
+        switch(bits) {
+        case 0b101:
+            return "mf8";
+        case 0b110:
+            return "mf4";
+        case 0b111:
+            return "mf2";
+        case 0b000:
+            return "m1";
+        case 0b001:
+            return "m2";
+        case 0b010:
+            return "m4";
+        case 0b011:
+            return "m8";
+        default:
+            return "illegal";
+        }
+    }
+    inline const char* ma_name(bool ma) { return ma ? "ma" : "mu"; }
+    inline const char* ta_name(bool ta) { return ta ? "ta" : "tu"; }
 <%}%>

    virt_addr_t execute_inst(finish_cond_e cond, virt_addr_t start, uint64_t icount_limit) override;
@@ -127,7 +177,792 @@ if(fcsr != null) {%>
    inline void set_tval(uint64_t new_tval){
        tval = new_tval;
    }
+<%if(vector != null) {
+    def xlen = constants.find { it.name == 'XLEN' }?.value ?: 0
+    def vlen = constants.find { it.name == 'VLEN' }?.value ?: 0 %>
+    inline void lower(){
+        this->core.reg.trap_state = 0;
+    }
+    uint64_t vlseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size){
+        switch(width_val){
+            case 0b000:
+                return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
+            case 0b101:
+                return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
+            case 0b110:
+                return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
+            case 0b111:
+                return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
+            default:
+                throw new std::runtime_error("Unsupported width bit value");
+        }
+    }
+    uint64_t vsseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size){
+        switch(width_val){
+            case 0b000:
+                return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
+            case 0b101:
+                return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
+            case 0b110:
+                return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
+            case 0b111:
+                return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
+            default:
+                throw new std::runtime_error("Unsupported width bit value");
+        }
+    }
+    uint64_t vlsseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size, int64_t stride){
+        switch(width_val){
+            case 0b000:
+                return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
+            case 0b101:
+                return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
+            case 0b110:
+                return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
+            case 0b111:
+                return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
+            default:
+                throw new std::runtime_error("Unsupported width bit value");
+        }    
+    }
+    uint64_t vssseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size, int64_t stride){
+        switch(width_val){
+            case 0b000:
+                return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
+            case 0b101:
+                return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
+            case 0b110:
+                return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
+            case 0b111:
+                return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
+            default:
+                throw new std::runtime_error("Unsupported width bit value");
+        }    
+    }

+    using indexed_load_store_t = std::function<uint64_t(void*, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)>, uint8_t*, uint64_t, uint64_t, softvector::vtype_t, bool, uint8_t, uint64_t, uint8_t, uint8_t)>;
+    template <typename T1, typename T2> indexed_load_store_t getFunction() {
+        return [this](void* core, std::function<uint64_t(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V, uint64_t vl,
+                      uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1, uint8_t vs2, uint8_t segment_size) {
+            return softvector::vector_load_store_index<${xlen}, ${vlen}, T1, T2>(core, load_store_fn, V, vl, vstart, vtype, vm, vd, rs1, vs2, segment_size);
+        };
+    }
+
+    const std::array<std::array<indexed_load_store_t, 4>, 4> functionTable = {{
+        {getFunction<uint8_t, uint8_t>(), getFunction<uint8_t, uint16_t>(), getFunction<uint8_t, uint32_t>(), getFunction<uint8_t, uint64_t>()},
+        {getFunction<uint16_t, uint8_t>(), getFunction<uint16_t, uint16_t>(), getFunction<uint16_t, uint32_t>(), getFunction<uint16_t, uint64_t>()},
+        {getFunction<uint32_t, uint8_t>(), getFunction<uint32_t, uint16_t>(), getFunction<uint32_t, uint32_t>(), getFunction<uint32_t, uint64_t>()},
+        {getFunction<uint64_t, uint8_t>(), getFunction<uint64_t, uint16_t>(), getFunction<uint64_t, uint32_t>(), getFunction<uint64_t, uint64_t>()}
+    }};
+    const size_t map_index_size[9] = { 0, 0, 1, 0, 2, 0, 0, 0, 3 }; // translate number of bytes to index in functionTable
+    uint64_t vlxseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t vs2, uint8_t segment_size, uint8_t index_byte_size, uint8_t data_byte_size, bool ordered){
+        return functionTable[map_index_size[index_byte_size]][data_byte_size](this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, vs2, segment_size);
+    }
+    uint64_t vsxseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vs3, uint64_t rs1_val, uint8_t vs2, uint8_t segment_size, uint8_t index_byte_size, uint8_t data_byte_size, bool ordered){
+        return functionTable[map_index_size[index_byte_size]][data_byte_size](this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vs3, rs1_val, vs2, segment_size);
+    }
+    void vector_vector_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_vector_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_vector_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_vector_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011:
+                return softvector::vector_vector_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_imm_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_imm_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_imm_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_imm_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::vector_imm_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_vector_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_vector_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_vector_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_vector_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011: // would widen to 128 bits
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_imm_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_imm_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_imm_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_imm_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011: // would widen to 128 bits
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_vector_ww(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_vector_op<${vlen}, uint16_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_vector_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_vector_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011: // would widen to 128 bits
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_imm_ww(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_imm_op<${vlen}, uint16_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_imm_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_imm_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011: // would widen to 128 bits
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_extend(uint8_t* V, uint8_t unary_op, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t target_sew_pow, uint8_t frac_pow){
+        switch(target_sew_pow){
+            case 4: // uint16_t target
+                if(frac_pow != 1) throw new std::runtime_error("Unsupported frac_pow");
+                return softvector::vector_unary_op<${vlen}, uint16_t, uint8_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2);
+            case 5: // uint32_t target
+                switch(frac_pow){
+                    case 1:
+                        return softvector::vector_unary_op<${vlen}, uint32_t, uint16_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2);
+                    case 2:
+                        return softvector::vector_unary_op<${vlen}, uint32_t, uint8_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2);
+                    default: 
+                        throw new std::runtime_error("Unsupported frac_pow");
+                }
+            case 6: // uint64_t target
+                switch(frac_pow){
+                    case 1:
+                        return softvector::vector_unary_op<${vlen}, uint64_t, uint32_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2);
+                    case 2:
+                        return softvector::vector_unary_op<${vlen}, uint64_t, uint16_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2);
+                    case 3:
+                        return softvector::vector_unary_op<${vlen}, uint64_t, uint8_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2);
+                    default: 
+                        throw new std::runtime_error("Unsupported frac_pow");
+                }
+            default: 
+                throw new std::runtime_error("Unsupported target_sew_pow");
+        }
+    }
+    void vector_vector_carry(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val, int8_t carry){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_vector_carry<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, vs1, carry);
+            case 0b001:
+                return softvector::vector_vector_carry<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, vs1, carry);
+            case 0b010:
+                return softvector::vector_vector_carry<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, vs1, carry);
+            case 0b011:
+                return softvector::vector_vector_carry<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, vs1, carry);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }    }
+    void vector_imm_carry(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val, int8_t carry){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_imm_carry<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, imm, carry);
+            case 0b001:
+                return softvector::vector_imm_carry<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, imm, carry);
+            case 0b010:
+                return softvector::vector_imm_carry<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, imm, carry);
+            case 0b011:
+                return softvector::vector_imm_carry<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, imm, carry);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void carry_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::carry_vector_vector_op<${vlen}, uint8_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::carry_vector_vector_op<${vlen}, uint16_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::carry_vector_vector_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011:
+                return softvector::carry_vector_vector_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void carry_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::carry_vector_imm_op<${vlen}, uint8_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::carry_vector_imm_op<${vlen}, uint16_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::carry_vector_imm_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::carry_vector_imm_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void mask_vector_vector_op(uint8_t* V, unsigned funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::mask_vector_vector_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::mask_vector_vector_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::mask_vector_vector_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011:
+                return softvector::mask_vector_vector_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void mask_vector_imm_op(uint8_t* V, unsigned funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::mask_vector_imm_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::mask_vector_imm_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::mask_vector_imm_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::mask_vector_imm_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_vector_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_vector_op<${vlen}, uint8_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_vector_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_vector_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011: // would require 128 bits vs2 value
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_imm_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_imm_op<${vlen}, uint8_t,  uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_imm_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_imm_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011: // would require 128 bits vs2 value
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_vector_merge<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_vector_merge<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_vector_merge<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011:
+                return softvector::vector_vector_merge<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_imm_merge<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_imm_merge<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_imm_merge<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::vector_imm_merge<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    bool sat_vector_vector_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint64_t vxrm, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::sat_vector_vector_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::sat_vector_vector_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::sat_vector_vector_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
+            case 0b011:
+                return softvector::sat_vector_vector_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    bool sat_vector_imm_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint64_t vxrm, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::sat_vector_imm_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::sat_vector_imm_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::sat_vector_imm_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::sat_vector_imm_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    bool sat_vector_vector_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint64_t vxrm, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::sat_vector_vector_op<${vlen}, uint8_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::sat_vector_vector_op<${vlen}, uint16_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::sat_vector_vector_op<${vlen}, uint32_t, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
+            case 0b011: // would require 128 bits vs2 value
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    bool sat_vector_imm_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint64_t vxrm, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::sat_vector_imm_op<${vlen}, uint8_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::sat_vector_imm_op<${vlen}, uint16_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::sat_vector_imm_op<${vlen}, uint32_t, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
+            case 0b011: // would require 128 bits vs2 value
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_red_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_red_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_red_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_red_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011:
+                return softvector::vector_red_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_red_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_red_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_red_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_red_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011: // would require 128 bits vs2 value
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1){
+        return softvector::mask_mask_op<${vlen}>(V, funct6, funct3, vl, vstart, vd, vs2, vs1);
+    }
+    uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2){
+        return softvector::vcpop<${vlen}>(V, vl, vstart, vm, vs2);
+    }
+    int64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2){
+        return softvector::vfirst<${vlen}>(V, vl, vstart, vm, vs2);
+    }
+    void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2){
+        return softvector::mask_set_op<${vlen}>(V, enc, vl, vstart, vm, vd, vs2);
+    }
+    void viota(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::viota<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2);
+            case 0b001:
+                return softvector::viota<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2);
+            case 0b010:
+                return softvector::viota<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2);
+            case 0b011:
+                return softvector::viota<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vid(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vid<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd);
+            case 0b001:
+                return softvector::vid<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd);
+            case 0b010:
+                return softvector::vid<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd);
+            case 0b011:
+                return softvector::vid<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void scalar_to_vector(uint8_t* V, softvector::vtype_t vtype, unsigned vd, uint64_t val, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                softvector::scalar_move<${vlen}, uint8_t>(V, vtype, vd, val, true);
+                break;
+            case 0b001:
+                softvector::scalar_move<${vlen}, uint16_t>(V, vtype, vd, val, true);
+                break;
+            case 0b010:
+                softvector::scalar_move<${vlen}, uint32_t>(V, vtype, vd, val, true);
+                break;
+            case 0b011:
+                softvector::scalar_move<${vlen}, uint64_t>(V, vtype, vd, val, true);
+                break;
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    uint64_t scalar_from_vector(uint8_t* V, softvector::vtype_t vtype, unsigned vd, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::scalar_move<${vlen}, uint8_t>(V, vtype, vd, 0, false);
+            case 0b001:
+                return softvector::scalar_move<${vlen}, uint16_t>(V, vtype, vd, 0, false);
+            case 0b010:
+                return softvector::scalar_move<${vlen}, uint32_t>(V, vtype, vd, 0, false);
+            case 0b011:
+                return softvector::scalar_move<${vlen}, uint64_t>(V, vtype, vd, 0, false);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_slideup<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_slideup<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_slideup<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::vector_slideup<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_slidedown<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_slidedown<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_slidedown<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::vector_slidedown<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_slide1up<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_slide1up<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_slide1up<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::vector_slide1up<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_slide1down<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_slide1down<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_slide1down<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::vector_slide1down<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_vector_gather<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_vector_gather<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_vector_gather<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011:
+                return softvector::vector_vector_gather<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_vector_gatherei16(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_vector_gather<${vlen}, uint8_t, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_vector_gather<${vlen}, uint16_t, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_vector_gather<${vlen}, uint32_t, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            case 0b011:
+                return softvector::vector_vector_gather<${vlen}, uint64_t, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint64_t imm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_imm_gather<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b001:
+                return softvector::vector_imm_gather<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b010:
+                return softvector::vector_imm_gather<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            case 0b011:
+                return softvector::vector_imm_gather<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_compress(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_compress<${vlen}, uint8_t>(V, vl, vstart, vtype, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_compress<${vlen}, uint16_t>(V, vl, vstart, vtype, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_compress<${vlen}, uint32_t>(V, vl, vstart, vtype, vd, vs2, vs1);
+            case 0b011:
+                return softvector::vector_compress<${vlen}, uint64_t>(V, vl, vstart, vtype, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_whole_move(uint8_t* V, uint8_t vd, uint8_t vs2, uint8_t count){
+        return softvector::vector_whole_move<${vlen}>(V, vd, vs2, count);
+    }
+    uint64_t fp_scalar_from_vector(uint8_t* V, softvector::vtype_t vtype, unsigned vd, uint8_t sew_val){
+        return scalar_from_vector(V, vtype, vd, sew_val);
+    }
+    void fp_vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
+        return vector_slide1up(V, vl, vstart, vtype, vm, vd, vs2, imm, sew_val);
+    }
+    void fp_vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
+        return vector_slide1down(V, vl, vstart, vtype, vm, vd, vs2, imm, sew_val);
+    }
+    void fp_vector_red_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::fp_vector_red_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b010:
+                return softvector::fp_vector_red_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b011:
+                return softvector::fp_vector_red_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_red_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::fp_vector_red_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b001:
+                return softvector::fp_vector_red_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b010:
+                return softvector::fp_vector_red_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b011: // would require 128 bits vs2 value
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_vector_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::fp_vector_vector_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b010:
+                return softvector::fp_vector_vector_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b011:
+                return softvector::fp_vector_vector_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_imm_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint64_t imm, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::fp_vector_imm_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            case 0b010:
+                return softvector::fp_vector_imm_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            case 0b011:
+                return softvector::fp_vector_imm_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_vector_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::fp_vector_vector_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b010:
+                return softvector::fp_vector_vector_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b011: // would widen to 128 bits
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_imm_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint64_t imm, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::fp_vector_imm_op<${vlen}, uint32_t, uint16_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            case 0b010:
+                return softvector::fp_vector_imm_op<${vlen}, uint64_t, uint32_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            case 0b011: // would widen to 128 bits
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_vector_ww(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::fp_vector_vector_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b010:
+                return softvector::fp_vector_vector_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b011: // would widen to 128 bits
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_imm_ww(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint64_t imm, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::fp_vector_imm_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            case 0b010:
+                return softvector::fp_vector_imm_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            case 0b011: // would widen to 128 bits
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_unary_op(uint8_t* V, uint8_t encoding_space, uint8_t unary_op, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::fp_vector_unary_op<${vlen}, uint16_t>(V, encoding_space, unary_op, vl, vstart, vtype, vm, vd, vs2, rm);
+            case 0b010:
+                return softvector::fp_vector_unary_op<${vlen}, uint32_t>(V, encoding_space, unary_op, vl, vstart, vtype, vm, vd, vs2, rm);
+            case 0b011:
+                return softvector::fp_vector_unary_op<${vlen}, uint64_t>(V, encoding_space, unary_op, vl, vstart, vtype, vm, vd, vs2, rm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void mask_fp_vector_vector_op(uint8_t* V, uint8_t funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::mask_fp_vector_vector_op<${vlen}, uint16_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b010:
+                return softvector::mask_fp_vector_vector_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            case 0b011:
+                return softvector::mask_fp_vector_vector_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void mask_fp_vector_imm_op(uint8_t* V, uint8_t funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint64_t imm, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                throw new std::runtime_error("Unsupported sew bit value");
+            case 0b001:
+                return softvector::mask_fp_vector_imm_op<${vlen}, uint16_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            case 0b010:
+                return softvector::mask_fp_vector_imm_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            case 0b011:
+                return softvector::mask_fp_vector_imm_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm, rm);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint64_t imm, uint8_t sew_val){
+        vector_imm_merge(V, vl, vstart, vtype, vm, vd, vs2, imm, sew_val);
+    }
+    void fp_vector_unary_w(uint8_t* V, uint8_t unary_op, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::fp_vector_unary_w<${vlen}, uint16_t, uint8_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2, rm);
+            case 0b001:
+                return softvector::fp_vector_unary_w<${vlen}, uint32_t, uint16_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2, rm);
+            case 0b010:
+                return softvector::fp_vector_unary_w<${vlen}, uint64_t, uint32_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2, rm);
+            case 0b011: // would widen to 128 bits
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void fp_vector_unary_n(uint8_t* V, uint8_t unary_op, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t rm, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::fp_vector_unary_n<${vlen}, uint8_t, uint16_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2, rm);
+            case 0b001:
+                return softvector::fp_vector_unary_n<${vlen}, uint16_t, uint32_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2, rm);
+            case 0b010:
+                return softvector::fp_vector_unary_n<${vlen}, uint32_t, uint64_t>(V, unary_op, vl, vstart, vtype, vm, vd, vs2, rm);
+            case 0b011: // would require 128 bit value to narrow
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+<%}%>
    uint64_t fetch_count{0};
    uint64_t tval{0};

@@ -175,22 +1010,8 @@ private:
    decoder instr_decoder;

    iss::status fetch_ins(virt_addr_t pc, uint8_t * data){
-        if(this->core.has_mmu()) {
-            auto phys_pc = this->core.virt2phys(pc);
-//            if ((pc.val & upper_bits) != ((pc.val + 2) & upper_bits)) { // we may cross a page boundary
-//                if (this->core.read(phys_pc, 2, data) != iss::Ok) return iss::Err;
-//                if ((data[0] & 0x3) == 0x3) // this is a 32bit instruction
-//                    if (this->core.read(this->core.v2p(pc + 2), 2, data + 2) != iss::Ok)
-//                        return iss::Err;
-//            } else {
-                if (this->core.read(phys_pc, 4, data) != iss::Ok)
-                    return iss::Err;
-//            }
-        } else {
-            if (this->core.read(phys_addr_t(pc.access, pc.space, pc.val), 4, data) != iss::Ok)
-                return iss::Err;
-
-        }
+        if (this->core.read(iss::address_type::PHYSICAL, pc.access, pc.space, pc.val, 4, data) != iss::Ok)
+            return iss::Err;
        return iss::Ok;
    }
 };
@@ -199,9 +1020,6 @@ template <typename CODE_WORD> void debug_fn(CODE_WORD insn) {
    volatile CODE_WORD x = insn;
    insn = 2 * x;
 }
-
-template <typename ARCH> vm_impl<ARCH>::vm_impl() { this(new ARCH()); }
-
 // according to
 // https://stackoverflow.com/questions/8871204/count-number-of-1s-in-binary-representation
 #ifdef __GCC__
@@ -288,8 +1106,8 @@ typename vm_base<ARCH>::virt_addr_t vm_impl<ARCH>::execute_inst(finish_cond_e co
                    // used registers<%instr.usedVariables.each{ k,v->
                    if(v.isArray) {%>
                    auto* ${k} = reinterpret_cast<uint${nativeTypeSize(v.type.size)}_t*>(this->regs_base_ptr+arch::traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::${k}0]);<% }else{ %> 
-                    auto* ${k} = reinterpret_cast<uint${nativeTypeSize(v.type.size)}_t*>(this->regs_base_ptr+arch::traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::${k}]);
-                    <%}}%>// calculate next pc value
+                    auto* ${k} = reinterpret_cast<uint${nativeTypeSize(v.type.size)}_t*>(this->regs_base_ptr+arch::traits<ARCH>::reg_byte_offsets[arch::traits<ARCH>::${k}]);<%}}%>
+                    // calculate next pc value
                    *NEXT_PC = *PC + ${instr.length/8};
                    // execute instruction<%instr.behavior.eachLine{%>
                    ${it}<%}%>
@@ -101,7 +101,7 @@ protected:
        return super::gen_cond_assign(cond, this->gen_ext(trueVal, size), this->gen_ext(falseVal, size));
    }

-    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, unsigned int &, BasicBlock *) override;
+    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, BasicBlock *) override;

    void gen_leave_behavior(BasicBlock *leave_blk) override;
    void gen_raise_trap(uint16_t trap_id, uint16_t cause);
@@ -244,7 +244,7 @@ vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)

 template <typename ARCH>
 std::tuple<continuation_e, BasicBlock *>
-vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, BasicBlock *this_block) {
+vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, BasicBlock *this_block) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
@@ -256,9 +256,10 @@ vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt,
    auto res = this->core.read(paddr, 4, data);
    if (res != iss::Ok) 
        return std::make_tuple(ILLEGAL_FETCH, nullptr);
-    if (instr == 0x0000006f || (instr&0xffff)==0xa001)
+    if (instr == 0x0000006f || (instr&0xffff)==0xa001){
+        this->builder.CreateBr(this->leave_blk);
        return std::make_tuple(JUMP_TO_SELF, nullptr);
-    ++inst_cnt;
+        }
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
@@ -340,6 +341,10 @@ void vm_impl<ARCH>::gen_instr_epilogue(BasicBlock *bb) {
    auto* icount_val = this->builder.CreateAdd(
        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::ICOUNT), get_reg_ptr(arch::traits<ARCH>::ICOUNT)), this->gen_const(64U, 1));
    this->builder.CreateStore(icount_val, get_reg_ptr(arch::traits<ARCH>::ICOUNT), false);
+    //increment cyclecount
+    auto* cycle_val = this->builder.CreateAdd(
+        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::CYCLE), get_reg_ptr(arch::traits<ARCH>::CYCLE)), this->gen_const(64U, 1));
+    this->builder.CreateStore(cycle_val, get_reg_ptr(arch::traits<ARCH>::CYCLE), false);
 }

 } // namespace ${coreDef.name.toLowerCase()}
@@ -83,21 +83,21 @@ protected:
    using vm_base<ARCH>::get_reg_ptr;

    using this_class = vm_impl<ARCH>;
-    using compile_ret_t = std::tuple<continuation_e>;
+    using compile_ret_t = continuation_e;
    using compile_func = compile_ret_t (this_class::*)(virt_addr_t &pc, code_word_t instr, tu_builder&);

    inline const char *name(size_t index){return traits::reg_aliases.at(index);}
 <%
 if(fcsr != null) {%>
    inline const char *fname(size_t index){return index < 32?name(index+traits::F0):"illegal";}   
-
-    void add_prologue(tu_builder& tu) override;
 <%}%>
+    void add_prologue(tu_builder& tu) override;
+
    void setup_module(std::string m) override {
        super::setup_module(m);
    }

-    compile_ret_t gen_single_inst_behavior(virt_addr_t &, unsigned int &, tu_builder&) override;
+    compile_ret_t gen_single_inst_behavior(virt_addr_t &, tu_builder&) override;

    void gen_trap_behavior(tu_builder& tu) override;

@@ -176,6 +176,7 @@ private:
        auto cur_pc_val = tu.constant(pc.val, traits::reg_bit_widths[traits::PC]);
        pc=pc+ ${instr.length/8};
        gen_set_pc(tu, pc, traits::NEXT_PC);
+        tu("(*cycle)++;");
        tu.open_scope();
        this->gen_set_tval(tu, instr);
        <%instr.behavior.eachLine{%>${it}
@@ -225,8 +226,8 @@ vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
    }()) {}

 template <typename ARCH>
-std::tuple<continuation_e>
-vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, tu_builder& tu) {
+continuation_e
+vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, tu_builder& tu) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
@@ -238,7 +239,6 @@ vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt,
        return ILLEGAL_FETCH;
    if (instr == 0x0000006f || (instr&0xffff)==0xa001) 
        return JUMP_TO_SELF;
-    ++inst_cnt;
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
@@ -273,10 +273,12 @@ template <typename ARCH> void vm_impl<ARCH>::gen_trap_behavior(tu_builder& tu) {
    tu.store(traits::LAST_BRANCH, tu.constant(static_cast<int>(UNKNOWN_JUMP),32));
    tu("return *next_pc;");
 }
-<%
-if(fcsr != null) {%>
 template <typename ARCH> void vm_impl<ARCH>::add_prologue(tu_builder& tu){
    std::ostringstream os;
+    os << tu.add_reg_ptr("trap_state", arch::traits<ARCH>::TRAP_STATE, this->regs_base_ptr);
+    os << tu.add_reg_ptr("pending_trap", arch::traits<ARCH>::PENDING_TRAP, this->regs_base_ptr);
+    os << tu.add_reg_ptr("cycle", arch::traits<ARCH>::CYCLE, this->regs_base_ptr);
+<%if(fcsr != null) {%>
    os << "uint32_t (*fget_flags)()=" << (uintptr_t)&fget_flags << ";\\n";
    os << "uint32_t (*fadd_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fadd_s << ";\\n";
    os << "uint32_t (*fsub_s)(uint32_t v1, uint32_t v2, uint8_t mode)=" << (uintptr_t)&fsub_s << ";\\n";
@@ -303,9 +305,9 @@ template <typename ARCH> void vm_impl<ARCH>::add_prologue(tu_builder& tu){
    os << "uint64_t (*fcvt_32_64)(uint32_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_32_64 << ";\\n";
    os << "uint32_t (*fcvt_64_32)(uint64_t v1, uint32_t op, uint8_t mode)=" << (uintptr_t)&fcvt_64_32 << ";\\n";
    os << "uint32_t (*unbox_s)(uint64_t v)=" << (uintptr_t)&unbox_s << ";\\n";
+    <%}%>
    tu.add_prologue(os.str());
 }
-<%}%>

 } // namespace ${coreDef.name.toLowerCase()}

@@ -1,3 +1,37 @@
+/*******************************************************************************
+ * Copyright (C) 2024 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
 #ifdef _MSC_VER
 #define _SCL_SECURE_NO_WARNINGS
 #define ELFIO_NO_INTTYPES
@@ -32,4 +66,4 @@ int main(int argc, char** argv) {
    dump::segment_datas(std::cout, reader);

    return 0;
-}
+}
@@ -0,0 +1,233 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+#ifndef _MSTATUS_TYPE
+#define _MSTATUS_TYPE
+
+#include <cstdint>
+#include <type_traits>
+#include <util/bit_field.h>
+#include <util/ities.h>
+
+namespace iss {
+namespace arch {
+
+template <class T, class Enable = void> struct status {};
+// specialization 32bit
+template <typename T> struct status<T, typename std::enable_if<std::is_same<T, uint32_t>::value>::type> {
+    static inline unsigned SD(T v) { return bit_sub<63, 1>(v); }
+    // value of XLEN for S-mode
+    static inline unsigned SXL(T v) { return bit_sub<34, 2>(v); };
+    // value of XLEN for U-mode
+    static inline unsigned UXL(T v) { return bit_sub<32, 2>(v); };
+    // Trap SRET
+    static inline unsigned TSR(T v) { return bit_sub<22, 1>(v); };
+    // Timeout Wait
+    static inline unsigned TW(T v) { return bit_sub<21, 1>(v); };
+    // Trap Virtual Memory
+    static inline unsigned TVM(T v) { return bit_sub<20, 1>(v); };
+    // Make eXecutable Readable
+    static inline unsigned MXR(T v) { return bit_sub<19, 1>(v); };
+    // permit Supervisor User Memory access
+    static inline unsigned SUM(T v) { return bit_sub<18, 1>(v); };
+    // Modify PRiVilege
+    static inline unsigned MPRV(T v) { return bit_sub<17, 1>(v); };
+    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
+    // dirty, some clean/Some dirty
+    static inline unsigned XS(T v) { return bit_sub<15, 2>(v); };
+    // floating-point unit status Off/Initial/Clean/Dirty
+    static inline unsigned FS(T v) { return bit_sub<13, 2>(v); };
+    // machine previous privilege
+    static inline unsigned MPP(T v) { return bit_sub<11, 2>(v); };
+    // supervisor previous privilege
+    static inline unsigned SPP(T v) { return bit_sub<8, 1>(v); };
+    // previous machine interrupt-enable
+    static inline unsigned MPIE(T v) { return bit_sub<7, 1>(v); };
+    // previous supervisor interrupt-enable
+    static inline unsigned SPIE(T v) { return bit_sub<5, 1>(v); };
+    // previous user interrupt-enable
+    static inline unsigned UPIE(T v) { return bit_sub<4, 1>(v); };
+    // machine interrupt-enable
+    static inline unsigned MIE(T v) { return bit_sub<3, 1>(v); };
+    // supervisor interrupt-enable
+    static inline unsigned SIE(T v) { return bit_sub<1, 1>(v); };
+    // user interrupt-enable
+    static inline unsigned UIE(T v) { return bit_sub<0, 1>(v); };
+};
+
+template <typename T> struct status<T, typename std::enable_if<std::is_same<T, uint64_t>::value>::type> {
+public:
+    // SD bit is read-only and is set when either the FS or XS bits encode a Dirty state (i.e., SD=((FS==11) OR
+    // XS==11)))
+    static inline unsigned SD(T v) { return bit_sub<63, 1>(v); };
+    // value of XLEN for S-mode
+    static inline unsigned SXL(T v) { return bit_sub<34, 2>(v); };
+    // value of XLEN for U-mode
+    static inline unsigned UXL(T v) { return bit_sub<32, 2>(v); };
+    // Trap SRET
+    static inline unsigned TSR(T v) { return bit_sub<22, 1>(v); };
+    // Timeout Wait
+    static inline unsigned TW(T v) { return bit_sub<21, 1>(v); };
+    // Trap Virtual Memory
+    static inline unsigned TVM(T v) { return bit_sub<20, 1>(v); };
+    // Make eXecutable Readable
+    static inline unsigned MXR(T v) { return bit_sub<19, 1>(v); };
+    // permit Supervisor User Memory access
+    static inline unsigned SUM(T v) { return bit_sub<18, 1>(v); };
+    // Modify PRiVilege
+    static inline unsigned MPRV(T v) { return bit_sub<17, 1>(v); };
+    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
+    // dirty, some clean/Some dirty
+    static inline unsigned XS(T v) { return bit_sub<15, 2>(v); };
+    // floating-point unit status Off/Initial/Clean/Dirty
+    static inline unsigned FS(T v) { return bit_sub<13, 2>(v); };
+    // machine previous privilege
+    static inline unsigned MPP(T v) { return bit_sub<11, 2>(v); };
+    // supervisor previous privilege
+    static inline unsigned SPP(T v) { return bit_sub<8, 1>(v); };
+    // previous machine interrupt-enable
+    static inline unsigned MPIE(T v) { return bit_sub<7, 1>(v); };
+    // previous supervisor interrupt-enable
+    static inline unsigned SPIE(T v) { return bit_sub<5, 1>(v); };
+    // previous user interrupt-enable
+    static inline unsigned UPIE(T v) { return bit_sub<4, 1>(v); };
+    // machine interrupt-enable
+    static inline unsigned MIE(T v) { return bit_sub<3, 1>(v); };
+    // supervisor interrupt-enable
+    static inline unsigned SIE(T v) { return bit_sub<1, 1>(v); };
+    // user interrupt-enable
+    static inline unsigned UIE(T v) { return bit_sub<0, 1>(v); };
+};
+
+// primary template
+template <class T, class Enable = void> struct hart_state {};
+// specialization 32bit
+template <typename T> class hart_state<T, typename std::enable_if<std::is_same<T, uint32_t>::value>::type> {
+public:
+    BEGIN_BF_DECL(mstatus_t, T);
+    // SD bit is read-only and is set when either the FS or XS bits encode a Dirty state (i.e., SD=((FS==11) OR
+    // XS==11)))
+    BF_FIELD(SD, 31, 1);
+    // Trap SRET
+    BF_FIELD(TSR, 22, 1);
+    // Timeout Wait
+    BF_FIELD(TW, 21, 1);
+    // Trap Virtual Memory
+    BF_FIELD(TVM, 20, 1);
+    // Make eXecutable Readable
+    BF_FIELD(MXR, 19, 1);
+    // permit Supervisor User Memory access
+    BF_FIELD(SUM, 18, 1);
+    // Modify PRiVilege
+    BF_FIELD(MPRV, 17, 1);
+    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
+    // dirty, some clean/Some dirty
+    BF_FIELD(XS, 15, 2);
+    // floating-point unit status Off/Initial/Clean/Dirty
+    BF_FIELD(FS, 13, 2);
+    // machine previous privilege
+    BF_FIELD(MPP, 11, 2);
+    // supervisor previous privilege
+    BF_FIELD(SPP, 8, 1);
+    // previous machine interrupt-enable
+    BF_FIELD(MPIE, 7, 1);
+    // previous supervisor interrupt-enable
+    BF_FIELD(SPIE, 5, 1);
+    // previous user interrupt-enable
+    BF_FIELD(UPIE, 4, 1);
+    // machine interrupt-enable
+    BF_FIELD(MIE, 3, 1);
+    // supervisor interrupt-enable
+    BF_FIELD(SIE, 1, 1);
+    // user interrupt-enable
+    BF_FIELD(UIE, 0, 1);
+    END_BF_DECL();
+
+    mstatus_t mstatus;
+
+    static const T mstatus_reset_val = 0x1800;
+};
+
+// specialization 64bit
+template <typename T> class hart_state<T, typename std::enable_if<std::is_same<T, uint64_t>::value>::type> {
+public:
+    BEGIN_BF_DECL(mstatus_t, T);
+    // SD bit is read-only and is set when either the FS or XS bits encode a Dirty state (i.e., SD=((FS==11) OR
+    // XS==11)))
+    BF_FIELD(SD, 63, 1);
+    // value of XLEN for S-mode
+    BF_FIELD(SXL, 34, 2);
+    // value of XLEN for U-mode
+    BF_FIELD(UXL, 32, 2);
+    // Trap SRET
+    BF_FIELD(TSR, 22, 1);
+    // Timeout Wait
+    BF_FIELD(TW, 21, 1);
+    // Trap Virtual Memory
+    BF_FIELD(TVM, 20, 1);
+    // Make eXecutable Readable
+    BF_FIELD(MXR, 19, 1);
+    // permit Supervisor User Memory access
+    BF_FIELD(SUM, 18, 1);
+    // Modify PRiVilege
+    BF_FIELD(MPRV, 17, 1);
+    // status of additional user-mode extensions and associated state, All off/None dirty or clean, some on/None
+    // dirty, some clean/Some dirty
+    BF_FIELD(XS, 15, 2);
+    // floating-point unit status Off/Initial/Clean/Dirty
+    BF_FIELD(FS, 13, 2);
+    // machine previous privilege
+    BF_FIELD(MPP, 11, 2);
+    // supervisor previous privilege
+    BF_FIELD(SPP, 8, 1);
+    // previous machine interrupt-enable
+    BF_FIELD(MPIE, 7, 1);
+    // previous supervisor interrupt-enable
+    BF_FIELD(SPIE, 5, 1);
+    // previous user interrupt-enable
+    BF_FIELD(UPIE, 4, 1);
+    // machine interrupt-enable
+    BF_FIELD(MIE, 3, 1);
+    // supervisor interrupt-enable
+    BF_FIELD(SIE, 1, 1);
+    // user interrupt-enable
+    BF_FIELD(UIE, 0, 1);
+    END_BF_DECL();
+
+    mstatus_t mstatus;
+
+    static const T mstatus_reset_val = 0x1800;
+};
+} // namespace arch
+} // namespace iss
+#endif // _MSTATUS_TYPE
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017, 2018, 2021 MINRES Technologies GmbH
+ * Copyright (C) 2017 - 2025 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -35,15 +35,24 @@
 #ifndef _RISCV_HART_COMMON
 #define _RISCV_HART_COMMON

-#include "iss/vm_types.h"
+#include "mstatus.h"
+#include "util/delegate.h"
+#include <array>
 #include <cstdint>
 #include <elfio/elfio.hpp>
 #include <fmt/format.h>
+#include <iss/arch/traits.h>
 #include <iss/arch_if.h>
 #include <iss/log_categories.h>
+#include <iss/mem/memory_if.h>
+#include <iss/semihosting/semihosting.h>
+#include <iss/vm_types.h>
+#include <limits>
+#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <util/logging.h>
+#include <util/sparse_array.h>

 #if defined(__GNUC__)
 #define likely(x) ::__builtin_expect(!!(x), 1)
@@ -56,9 +65,7 @@
 namespace iss {
 namespace arch {

-enum { tohost_dflt = 0xF0001000, fromhost_dflt = 0xF0001040 };
-
-enum features_e { FEAT_NONE, FEAT_PMP = 1, FEAT_EXT_N = 2, FEAT_CLIC = 4, FEAT_DEBUG = 8, FEAT_TCM = 16 };
+enum features_e { FEAT_NONE, FEAT_EXT_N = 1, FEAT_DEBUG = 2 };

 enum riscv_csr {
    /* user-level CSR */
@@ -187,26 +194,19 @@ enum riscv_csr {
    dcsr = 0x7B0,
    dpc = 0x7B1,
    dscratch0 = 0x7B2,
-    dscratch1 = 0x7B3
+    dscratch1 = 0x7B3,
+    // vector CSR
+    //  URW
+    vstart = 0x008,
+    vxsat = 0x009,
+    vxrm = 0x00A,
+    vcsr = 0x00F,
+    // URO
+    vl = 0xC20,
+    vtype = 0xC21,
+    vlenb = 0xC22,
 };

-enum {
-    PGSHIFT = 12,
-    PTE_PPN_SHIFT = 10,
-    // page table entry (PTE) fields
-    PTE_V = 0x001,   // Valid
-    PTE_R = 0x002,   // Read
-    PTE_W = 0x004,   // Write
-    PTE_X = 0x008,   // Execute
-    PTE_U = 0x010,   // User
-    PTE_G = 0x020,   // Global
-    PTE_A = 0x040,   // Accessed
-    PTE_D = 0x080,   // Dirty
-    PTE_SOFT = 0x300 // Reserved for Software
-};
-
-template <typename T> inline bool PTE_TABLE(T PTE) { return (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V); }
-
 enum { PRIV_U = 0, PRIV_S = 1, PRIV_M = 3, PRIV_D = 4 };

 enum {
@@ -225,25 +225,6 @@ enum {
    ISA_U = 1 << 20
 };

-struct vm_info {
-    int levels;
-    int idxbits;
-    int ptesize;
-    uint64_t ptbase;
-    bool is_active() { return levels; }
-};
-
-struct feature_config {
-    uint64_t clic_base{0xc0000000};
-    unsigned clic_int_ctl_bits{4};
-    unsigned clic_num_irq{16};
-    unsigned clic_num_trigger{0};
-    uint64_t tcm_base{0x10000000};
-    uint64_t tcm_size{0x8000};
-    uint64_t io_address{0xf0000000};
-    uint64_t io_addr_mask{0xf0000000};
-};
-
 class trap_load_access_fault : public trap_access {
 public:
    trap_load_access_fault(uint64_t badaddr)
@@ -270,64 +251,171 @@ public:
    : trap_access(15 << 16, badaddr) {}
 };

-inline void read_reg_uint32(uint64_t offs, uint32_t& reg, uint8_t* const data, unsigned length) {
-    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
-    switch(offs & 0x3) {
-    case 0:
-        for(auto i = 0U; i < length; ++i)
-            *(data + i) = *(reg_ptr + i);
-        break;
-    case 1:
-        for(auto i = 0U; i < length; ++i)
-            *(data + i) = *(reg_ptr + 1 + i);
-        break;
-    case 2:
-        for(auto i = 0U; i < length; ++i)
-            *(data + i) = *(reg_ptr + 2 + i);
-        break;
-    case 3:
-        *data = *(reg_ptr + 3);
-        break;
-    }
-}
+template <typename WORD_TYPE> struct priv_if {
+    using rd_csr_f = std::function<iss::status(unsigned addr, WORD_TYPE&)>;
+    using wr_csr_f = std::function<iss::status(unsigned addr, WORD_TYPE)>;

-inline void write_reg_uint32(uint64_t offs, uint32_t& reg, const uint8_t* const data, unsigned length) {
-    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
-    switch(offs & 0x3) {
-    case 0:
-        for(auto i = 0U; i < length; ++i)
-            *(reg_ptr + i) = *(data + i);
-        break;
-    case 1:
-        for(auto i = 0U; i < length; ++i)
-            *(reg_ptr + 1 + i) = *(data + i);
-        break;
-    case 2:
-        for(auto i = 0U; i < length; ++i)
-            *(reg_ptr + 2 + i) = *(data + i);
-        break;
-    case 3:
-        *(reg_ptr + 3) = *data;
-        break;
+    std::function<iss::status(unsigned, WORD_TYPE&)> read_csr;
+    std::function<iss::status(unsigned, WORD_TYPE)> write_csr;
+    std::function<iss::status(uint8_t const*)> exec_htif;
+    std::function<void(uint16_t, uint16_t, WORD_TYPE)> raise_trap; // trap_id, cause, fault_data
+    std::unordered_map<unsigned, rd_csr_f>& csr_rd_cb;
+    std::unordered_map<unsigned, wr_csr_f>& csr_wr_cb;
+    hart_state<WORD_TYPE>& state;
+    uint8_t& PRIV;
+    WORD_TYPE& PC;
+    uint64_t& tohost;
+    uint64_t& fromhost;
+    unsigned& max_irq;
+};
+
+template <typename BASE, typename LOGCAT = logging::disass> struct riscv_hart_common : public BASE, public mem::memory_elem {
+    const std::array<const char, 4> lvl = {{'U', 'S', 'H', 'M'}};
+    const std::array<const char*, 16> trap_str = {{""
+                                                   "Instruction address misaligned", // 0
+                                                   "Instruction access fault",       // 1
+                                                   "Illegal instruction",            // 2
+                                                   "Breakpoint",                     // 3
+                                                   "Load address misaligned",        // 4
+                                                   "Load access fault",              // 5
+                                                   "Store/AMO address misaligned",   // 6
+                                                   "Store/AMO access fault",         // 7
+                                                   "Environment call from U-mode",   // 8
+                                                   "Environment call from S-mode",   // 9
+                                                   "Reserved",                       // a
+                                                   "Environment call from M-mode",   // b
+                                                   "Instruction page fault",         // c
+                                                   "Load page fault",                // d
+                                                   "Reserved",                       // e
+                                                   "Store/AMO page fault"}};
+    const std::array<const char*, 12> irq_str = {{"User software interrupt", "Supervisor software interrupt", "Reserved",
+                                                  "Machine software interrupt", "User timer interrupt", "Supervisor timer interrupt",
+                                                  "Reserved", "Machine timer interrupt", "User external interrupt",
+                                                  "Supervisor external interrupt", "Reserved", "Machine external interrupt"}};
+    constexpr static unsigned MEM = traits<BASE>::MEM;
+
+    using core = BASE;
+    using this_class = riscv_hart_common<BASE, LOGCAT>;
+    using phys_addr_t = typename core::phys_addr_t;
+    using reg_t = typename core::reg_t;
+    using addr_t = typename core::addr_t;
+
+    using rd_csr_f = std::function<iss::status(unsigned addr, reg_t&)>;
+    using wr_csr_f = std::function<iss::status(unsigned addr, reg_t)>;
+
+#define MK_CSR_RD_CB(FCT) [this](unsigned a, reg_t& r) -> iss::status { return this->FCT(a, r); };
+#define MK_CSR_WR_CB(FCT) [this](unsigned a, reg_t r) -> iss::status { return this->FCT(a, r); };
+
+    riscv_hart_common()
+    : state()
+    , instr_if(*this) {
+        // reset values
+        csr[misa] = traits<BASE>::MISA_VAL;
+        csr[mvendorid] = 0x669;
+        csr[marchid] = traits<BASE>::MARCHID_VAL;
+        csr[mimpid] = 1;
+
+        if(traits<BASE>::FLEN > 0) {
+            csr_rd_cb[fcsr] = MK_CSR_RD_CB(read_fcsr);
+            csr_wr_cb[fcsr] = MK_CSR_WR_CB(write_fcsr);
+            csr_rd_cb[fflags] = MK_CSR_RD_CB(read_fcsr);
+            csr_wr_cb[fflags] = MK_CSR_WR_CB(write_fcsr);
+            csr_rd_cb[frm] = MK_CSR_RD_CB(read_fcsr);
+            csr_wr_cb[frm] = MK_CSR_WR_CB(write_fcsr);
+        }
+        if(traits<BASE>::V_REGS_SIZE > 0) {
+            csr_rd_cb[vstart] = MK_CSR_RD_CB(read_vstart);
+            csr_wr_cb[vstart] = MK_CSR_WR_CB(write_vstart);
+            csr_rd_cb[vxsat] = MK_CSR_RD_CB(read_vxsat);
+            csr_wr_cb[vxsat] = MK_CSR_WR_CB(write_vxsat);
+            csr_rd_cb[vxrm] = MK_CSR_RD_CB(read_vxrm);
+            csr_wr_cb[vxrm] = MK_CSR_WR_CB(write_vxrm);
+            csr_rd_cb[vcsr] = MK_CSR_RD_CB(read_vcsr);
+            csr_wr_cb[vcsr] = MK_CSR_WR_CB(write_vcsr);
+            csr_rd_cb[vl] = MK_CSR_RD_CB(read_vl);
+            csr_rd_cb[vtype] = MK_CSR_RD_CB(read_vtype);
+            csr_rd_cb[vlenb] = MK_CSR_RD_CB(read_vlenb);
+        }
+        for(unsigned addr = mhpmcounter3; addr <= mhpmcounter31; ++addr) {
+            csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+            csr_wr_cb[addr] = MK_CSR_WR_CB(write_plain);
+        }
+        if(traits<BASE>::XLEN == 32)
+            for(unsigned addr = mhpmcounter3h; addr <= mhpmcounter31h; ++addr) {
+                csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+                csr_wr_cb[addr] = MK_CSR_WR_CB(write_plain);
+            }
+        for(unsigned addr = mhpmevent3; addr <= mhpmevent31; ++addr) {
+            csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+            csr_wr_cb[addr] = MK_CSR_WR_CB(write_plain);
+        }
+        for(unsigned addr = hpmcounter3; addr <= hpmcounter31; ++addr) {
+            csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+        }
+        if(traits<BASE>::XLEN == 32)
+            for(unsigned addr = hpmcounter3h; addr <= hpmcounter31h; ++addr) {
+                csr_rd_cb[addr] = MK_CSR_RD_CB(read_null);
+            }
+        // common regs
+        const std::array<unsigned, 4> roaddrs{{misa, mvendorid, marchid, mimpid}};
+        for(auto addr : roaddrs) {
+            csr_rd_cb[addr] = MK_CSR_RD_CB(read_plain);
+            csr_wr_cb[addr] = MK_CSR_WR_CB(write_null);
+        }
+        // special handling & overrides
+        csr_rd_cb[time] = MK_CSR_RD_CB(read_time);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[timeh] = MK_CSR_RD_CB(read_time);
+        csr_rd_cb[cycle] = MK_CSR_RD_CB(read_cycle);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[cycleh] = MK_CSR_RD_CB(read_cycle);
+        csr_rd_cb[instret] = MK_CSR_RD_CB(read_instret);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[instreth] = MK_CSR_RD_CB(read_instret);
+
+        csr_rd_cb[mcycle] = MK_CSR_RD_CB(read_cycle);
+        csr_wr_cb[mcycle] = MK_CSR_WR_CB(write_cycle);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[mcycleh] = MK_CSR_RD_CB(read_cycle);
+        if(traits<BASE>::XLEN == 32)
+            csr_wr_cb[mcycleh] = MK_CSR_WR_CB(write_cycle);
+        csr_rd_cb[minstret] = MK_CSR_RD_CB(read_instret);
+        csr_wr_cb[minstret] = MK_CSR_WR_CB(write_instret);
+        if(traits<BASE>::XLEN == 32)
+            csr_rd_cb[minstreth] = MK_CSR_RD_CB(read_instret);
+        if(traits<BASE>::XLEN == 32)
+            csr_wr_cb[minstreth] = MK_CSR_WR_CB(write_instret);
+        csr_rd_cb[mhartid] = MK_CSR_RD_CB(read_hartid);
+    };
+
+    ~riscv_hart_common() {
+        if(io_buf.str().length()) {
+            CPPLOG(INFO) << "tohost send '" << io_buf.str() << "'";
+        }
    }
-}
-struct riscv_hart_common {
-    riscv_hart_common(){};
-    ~riscv_hart_common(){};
+
    std::unordered_map<std::string, uint64_t> symbol_table;
    uint64_t entry_address{0};
-    uint64_t tohost = tohost_dflt;
-    uint64_t fromhost = fromhost_dflt;
+    uint64_t tohost = std::numeric_limits<uint64_t>::max();
+    uint64_t fromhost = std::numeric_limits<uint64_t>::max();
+    std::stringstream io_buf;

-    bool read_elf_file(std::string name, uint8_t expected_elf_class,
-                       std::function<iss::status(uint64_t, uint64_t, const uint8_t* const)> cb) {
+    void set_semihosting_callback(semihosting_cb_t<reg_t> cb) { semihosting_cb = cb; };
+
+    std::pair<uint64_t, bool> load_file(std::string name, int type) {
+        return std::make_pair(entry_address, read_elf_file(name, sizeof(reg_t) == 4 ? ELFIO::ELFCLASS32 : ELFIO::ELFCLASS64));
+    }
+
+    bool read_elf_file(std::string name, uint8_t expected_elf_class) {
        // Create elfio reader
        ELFIO::elfio reader;
        // Load ELF data
        if(reader.load(name)) {
            // check elf properties
-            if(reader.get_class() != expected_elf_class)
+            if(reader.get_class() != expected_elf_class) {
+                CPPLOG(ERR) << "ISA missmatch, selected XLEN does not match supplied file ";
                return false;
+            }
            if(reader.get_type() != ELFIO::ET_EXEC)
                return false;
            if(reader.get_machine() != ELFIO::EM_RISCV)
@@ -337,8 +425,9 @@ struct riscv_hart_common {
                const auto fsize = pseg->get_file_size(); // 0x42c/0x0
                const auto seg_data = pseg->get_data();
                const auto type = pseg->get_type();
-                if(type == 1 && fsize > 0) {
-                    auto res = cb(pseg->get_physical_address(), fsize, reinterpret_cast<const uint8_t* const>(seg_data));
+                if(type == ELFIO::PT_LOAD && fsize > 0) {
+                    auto res = this->write(iss::address_type::PHYSICAL, iss::access_type::DEBUG_WRITE, traits<BASE>::MEM,
+                                           pseg->get_physical_address(), fsize, reinterpret_cast<const uint8_t* const>(seg_data));
                    if(res != iss::Ok)
                        CPPLOG(ERR) << "problem writing " << fsize << "bytes to 0x" << std::hex << pseg->get_physical_address();
                }
@@ -363,20 +452,459 @@ struct riscv_hart_common {
 #endif
                    }
                }
-                try {
-                    tohost = symbol_table.at("tohost");
-                    try {
-                        fromhost = symbol_table.at("fromhost");
-                    } catch(std::out_of_range& e) {
-                        fromhost = tohost + 0x40;
-                    }
-                } catch(std::out_of_range& e) {
-                }
+                auto to_it = symbol_table.find("tohost");
+                if(to_it != std::end(symbol_table))
+                    tohost = to_it->second;
+                auto from_it = symbol_table.find("tohost");
+                if(from_it != std::end(symbol_table))
+                    tohost = from_it->second;
            }
            return true;
        }
        return false;
    };
+
+    iss::status execute_sys_write(arch_if* aif, const std::array<uint64_t, 8>& loaded_payload, unsigned mem_type) {
+        uint64_t fd = loaded_payload[1];
+        uint64_t buf_ptr = loaded_payload[2];
+        uint64_t len = loaded_payload[3];
+        std::vector<char> buf(len);
+        if(aif->read(address_type::PHYSICAL, access_type::DEBUG_READ, mem_type, buf_ptr, len, reinterpret_cast<uint8_t*>(buf.data()))) {
+            CPPLOG(ERR) << "SYS_WRITE buffer read went wrong";
+            return iss::Err;
+        }
+        // we disregard the fd and just log to stdout
+        for(size_t i = 0; i < len; i++) {
+            if(buf[i] == '\n' || buf[i] == '\0') {
+                CPPLOG(INFO) << "tohost send '" << io_buf.str() << "'";
+                io_buf.str("");
+            } else
+                io_buf << buf[i];
+        }
+
+        // Not sure what the correct return value should be
+        uint8_t ret_val = 1;
+        if(fromhost != std::numeric_limits<uint64_t>::max())
+            if(aif->write(address_type::PHYSICAL, access_type::DEBUG_WRITE, mem_type, fromhost, 1, &ret_val)) {
+                CPPLOG(ERR) << "Fromhost write went wrong";
+                return iss::Err;
+            }
+        return iss::Ok;
+    }
+
+    constexpr bool has_compressed() { return traits<BASE>::MISA_VAL & 0b0100; }
+
+    constexpr reg_t get_pc_mask() { return has_compressed() ? (reg_t)~1 : (reg_t)~3; }
+
+    void disass_output(uint64_t pc, const std::string instr) override {
+        // NSCLOG(INFO, LOGCAT) << fmt::format("0x{:016x}    {:40} [p:{};s:0x{:x};c:{}]", pc, instr, lvl[this->reg.PRIV],
+        // (reg_t)state.mstatus,
+        //                                     this->reg.cycle + cycle_offset);
+        NSCLOG(INFO, LOGCAT) << fmt::format("0x{:016x}    {:40} [p:{};c:{}]", pc, instr, lvl[this->reg.PRIV],
+                                            this->reg.cycle + cycle_offset);
+    };
+
+    void register_csr(unsigned addr, rd_csr_f f) { csr_rd_cb[addr] = f; }
+    void register_csr(unsigned addr, wr_csr_f f) { csr_wr_cb[addr] = f; }
+    void register_csr(unsigned addr, rd_csr_f rdf, wr_csr_f wrf) {
+        csr_rd_cb[addr] = rdf;
+        csr_wr_cb[addr] = wrf;
+    }
+    void unregister_csr_rd(unsigned addr) { csr_rd_cb.erase(addr); }
+    void unregister_csr_wr(unsigned addr) { csr_wr_cb.erase(addr); }
+
+    bool debug_mode_active() { return this->reg.PRIV & 0x4; }
+
+    const reg_t& get_mhartid() const { return mhartid_reg; }
+    void set_mhartid(reg_t mhartid) { mhartid_reg = mhartid; };
+
+    iss::status read_csr(unsigned addr, reg_t& val) {
+        if(addr >= csr.size())
+            return iss::Err;
+        auto req_priv_lvl = (addr >> 8) & 0x3;
+        if(this->reg.PRIV < req_priv_lvl) // not having required privileges
+            throw illegal_instruction_fault(this->fault_data);
+        auto it = csr_rd_cb.find(addr);
+        if(it == csr_rd_cb.end() || !it->second) // non existent register
+            throw illegal_instruction_fault(this->fault_data);
+        return it->second(addr, val);
+    }
+
+    iss::status write_csr(unsigned addr, reg_t val) {
+        if(addr >= csr.size())
+            return iss::Err;
+        auto req_priv_lvl = (addr >> 8) & 0x3;
+        if(this->reg.PRIV < req_priv_lvl) // not having required privileges
+            throw illegal_instruction_fault(this->fault_data);
+        if((addr & 0xc00) == 0xc00) // writing to read-only region
+            throw illegal_instruction_fault(this->fault_data);
+        auto it = csr_wr_cb.find(addr);
+        if(it == csr_wr_cb.end() || !it->second) // non existent register
+            throw illegal_instruction_fault(this->fault_data);
+        return it->second(addr, val);
+    }
+
+    iss::status read_null(unsigned addr, reg_t& val) {
+        val = 0;
+        return iss::Ok;
+    }
+
+    iss::status write_null(unsigned addr, reg_t val) { return iss::status::Ok; }
+
+    iss::status read_plain(unsigned addr, reg_t& val) {
+        val = csr[addr];
+        return iss::Ok;
+    }
+
+    iss::status write_plain(unsigned addr, reg_t val) {
+        csr[addr] = val;
+        return iss::Ok;
+    }
+
+    iss::status read_cycle(unsigned addr, reg_t& val) {
+        auto cycle_val = this->reg.cycle + cycle_offset;
+        if(addr == mcycle) {
+            val = static_cast<reg_t>(cycle_val);
+        } else if(addr == mcycleh) {
+            val = static_cast<reg_t>(cycle_val >> 32);
+        }
+        return iss::Ok;
+    }
+
+    iss::status write_cycle(unsigned addr, reg_t val) {
+        if(sizeof(typename traits<BASE>::reg_t) != 4) {
+            mcycle_csr = static_cast<uint64_t>(val);
+        } else {
+            if(addr == mcycle) {
+                mcycle_csr = (mcycle_csr & 0xffffffff00000000) + val;
+            } else {
+                mcycle_csr = (static_cast<uint64_t>(val) << 32) + (mcycle_csr & 0xffffffff);
+            }
+        }
+        cycle_offset = mcycle_csr - this->reg.cycle; // TODO: relying on wrap-around
+        return iss::Ok;
+    }
+
+    iss::status read_instret(unsigned addr, reg_t& val) {
+        if((addr & 0xff) == (minstret & 0xff)) {
+            val = static_cast<reg_t>(this->reg.instret);
+        } else if((addr & 0xff) == (minstreth & 0xff)) {
+            val = static_cast<reg_t>(this->reg.instret >> 32);
+        }
+        return iss::Ok;
+    }
+
+    iss::status write_instret(unsigned addr, reg_t val) {
+        if(sizeof(typename traits<BASE>::reg_t) != 4) {
+            this->reg.instret = static_cast<uint64_t>(val);
+        } else {
+            if((addr & 0xff) == (minstret & 0xff)) {
+                this->reg.instret = (this->reg.instret & 0xffffffff00000000) + val;
+            } else {
+                this->reg.instret = (static_cast<uint64_t>(val) << 32) + (this->reg.instret & 0xffffffff);
+            }
+        }
+        this->reg.instret--;
+        return iss::Ok;
+    }
+
+    iss::status read_time(unsigned addr, reg_t& val) {
+        uint64_t time_val = this->reg.cycle / (100000000 / 32768 - 1); //-> ~3052;
+        if(addr == time) {
+            val = static_cast<reg_t>(time_val);
+        } else if(addr == timeh) {
+            if(sizeof(typename traits<BASE>::reg_t) != 4)
+                return iss::Err;
+            val = static_cast<reg_t>(time_val >> 32);
+        }
+        return iss::Ok;
+    }
+
+    iss::status read_tvec(unsigned addr, reg_t& val) {
+        val = csr[addr] & ~2;
+        return iss::Ok;
+    }
+
+    iss::status read_hartid(unsigned addr, reg_t& val) {
+        val = mhartid_reg;
+        return iss::Ok;
+    }
+
+    iss::status write_epc(unsigned addr, reg_t val) {
+        csr[addr] = val & get_pc_mask();
+        return iss::Ok;
+    }
+
+    iss::status write_dcsr(unsigned addr, reg_t val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        //                  +-------------- ebreakm
+        //                  |   +---------- stepi
+        //                  |   |  +++----- cause
+        //                  |   |  |||   +- step
+        csr[addr] = val & 0b1000100111000100U;
+        return iss::Ok;
+    }
+
+    iss::status read_debug(unsigned addr, reg_t& val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        val = csr[addr];
+        return iss::Ok;
+    }
+
+    iss::status write_dscratch(unsigned addr, reg_t val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        csr[addr] = val;
+        return iss::Ok;
+    }
+
+    iss::status read_dpc(unsigned addr, reg_t& val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        val = this->reg.DPC;
+        return iss::Ok;
+    }
+
+    iss::status write_dpc(unsigned addr, reg_t val) {
+        if(!debug_mode_active())
+            throw illegal_instruction_fault(this->fault_data);
+        this->reg.DPC = val;
+        return iss::Ok;
+    }
+
+    iss::status read_fcsr(unsigned addr, reg_t& val) {
+        switch(addr) {
+        case 1: // fflags, 4:0
+            val = bit_sub<0, 5>(this->get_fcsr());
+            break;
+        case 2: // frm, 7:5
+            val = bit_sub<5, 3>(this->get_fcsr());
+            break;
+        case 3: // fcsr
+            val = this->get_fcsr();
+            break;
+        default:
+            return iss::Err;
+        }
+        return iss::Ok;
+    }
+
+    iss::status write_fcsr(unsigned addr, reg_t val) {
+        switch(addr) {
+        case 1: // fflags, 4:0
+            this->set_fcsr((this->get_fcsr() & 0xffffffe0) | (val & 0x1f));
+            break;
+        case 2: // frm, 7:5
+            this->set_fcsr((this->get_fcsr() & 0xffffff1f) | ((val & 0x7) << 5));
+            break;
+        case 3: // fcsr
+            this->set_fcsr(val & 0xff);
+            break;
+        default:
+            return iss::Err;
+        }
+        return iss::Ok;
+    }
+
+    iss::status read_vstart(unsigned addr, reg_t& val) {
+        val = this->get_vstart();
+        return iss::Ok;
+    }
+
+    iss::status write_vstart(unsigned addr, reg_t val) {
+        this->set_vstart(val);
+        return iss::Ok;
+    }
+
+    iss::status read_vxsat(unsigned addr, reg_t& val) {
+        val = this->get_vxsat();
+        return iss::Ok;
+    }
+
+    iss::status write_vxsat(unsigned addr, reg_t val) {
+        this->set_vxsat(val & 1);
+        csr[vcsr] = (~1ULL & csr[vcsr]) | (val & 1);
+        return iss::Ok;
+    }
+
+    iss::status read_vxrm(unsigned addr, reg_t& val) {
+        val = this->get_vxrm();
+        return iss::Ok;
+    }
+
+    iss::status write_vxrm(unsigned addr, reg_t val) {
+        this->set_vxrm(val & 0b11);
+        csr[vcsr] = (~0b110ULL & csr[vcsr]) | ((val & 0b11) << 1);
+        return iss::Ok;
+    }
+
+    iss::status read_vcsr(unsigned addr, reg_t& val) {
+        val = csr[vcsr];
+        return iss::Ok;
+    }
+
+    iss::status write_vcsr(unsigned addr, reg_t val) {
+        csr[vcsr] = val;
+        return iss::Ok;
+    }
+
+    iss::status read_vl(unsigned addr, reg_t& val) {
+        val = this->get_vl();
+        return iss::Ok;
+    }
+
+    iss::status read_vtype(unsigned addr, reg_t& val) {
+        val = this->get_vtype();
+        return iss::Ok;
+    }
+
+    iss::status read_vlenb(unsigned addr, reg_t& val) {
+        val = csr[vlenb];
+        return iss::Ok;
+    }
+
+    priv_if<reg_t> get_priv_if() {
+        return priv_if<reg_t>{.read_csr = [this](unsigned addr, reg_t& val) -> iss::status { return read_csr(addr, val); },
+                              .write_csr = [this](unsigned addr, reg_t val) -> iss::status { return write_csr(addr, val); },
+                              .exec_htif = [this](uint8_t const* data) -> iss::status { return execute_htif(data); },
+                              .raise_trap =
+                                  [this](uint16_t trap_id, uint16_t cause, reg_t fault_data) {
+                                      this->reg.trap_state = 0x80ULL << 24 | (cause << 16) | trap_id;
+                                      this->fault_data = fault_data;
+                                  },
+                              .csr_rd_cb{this->csr_rd_cb},
+                              .csr_wr_cb{csr_wr_cb},
+                              .state{this->state},
+                              .PRIV{this->reg.PRIV},
+                              .PC{this->reg.PC},
+                              .tohost{this->tohost},
+                              .fromhost{this->fromhost},
+                              .max_irq{mcause_max_irq}};
+    }
+
+    iss::status execute_htif(uint8_t const* data) {
+        reg_t cur_data = *reinterpret_cast<const reg_t*>(data);
+        // Extract Device (bits 63:56)
+        uint8_t device = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 56) & 0xFF;
+        // Extract Command (bits 55:48)
+        uint8_t command = traits<BASE>::XLEN == 32 ? 0 : (cur_data >> 48) & 0xFF;
+        // Extract payload (bits 47:0)
+        uint64_t payload_addr = cur_data & 0xFFFFFFFFFFFFULL;
+        if(payload_addr & 1) {
+            CPPLOG(FATAL) << "this->tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr
+                          << "), stopping simulation";
+            this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+            this->interrupt_sim = payload_addr;
+            return iss::Ok;
+        } else if(device == 0 && command == 0) {
+            std::array<uint64_t, 8> loaded_payload;
+            if(memory.rd_mem(access_type::DEBUG_READ, payload_addr, 8 * sizeof(uint64_t),
+                             reinterpret_cast<uint8_t*>(loaded_payload.data())) == iss::Err)
+                CPPLOG(ERR) << "Syscall read went wrong";
+            uint64_t syscall_num = loaded_payload.at(0);
+            if(syscall_num == 64) { // SYS_WRITE
+                return this->execute_sys_write(this, loaded_payload, traits<BASE>::MEM);
+            } else {
+                CPPLOG(ERR) << "this->tohost syscall with number 0x" << std::hex << syscall_num << std::dec << " (" << syscall_num
+                            << ") not implemented";
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
+            }
+        } else {
+            CPPLOG(ERR) << "this->tohost functionality not implemented for device " << device << " and command " << command;
+            this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+            this->interrupt_sim = payload_addr;
+            return iss::Ok;
+        }
+    }
+
+    mem::memory_hierarchy memories;
+
+    mem::memory_if get_mem_if() override {
+        assert(false || "This function should never be called");
+        return mem::memory_if{};
+    }
+
+    void set_next(mem::memory_if mem_if) override { memory = mem_if; };
+
+    void set_irq_num(unsigned i) { mcause_max_irq = 1 << util::ilog2(i); }
+
+protected:
+    hart_state<reg_t> state;
+
+    static constexpr reg_t get_mstatus_mask_t(unsigned priv_lvl = PRIV_M) {
+        if(sizeof(reg_t) == 4) {
+            return priv_lvl == PRIV_U ? 0x80000011UL :   // 0b1...0 0001 0001
+                       priv_lvl == PRIV_S ? 0x800de133UL // 0b0...0 0001 1000 1001 1001;
+                                          : 0x807ff9ddUL;
+        } else {
+            return priv_lvl == PRIV_U ? 0x011ULL : // 0b1...0 0001 0001
+                       priv_lvl == PRIV_S ? 0x000de133ULL
+                                          : 0x007ff9ddULL;
+        }
+    }
+
+    mem::memory_if memory;
+    struct riscv_instrumentation_if : public iss::instrumentation_if {
+
+        riscv_instrumentation_if(riscv_hart_common<BASE, LOGCAT>& arch)
+        : arch(arch) {}
+        /**
+         * get the name of this architecture
+         *
+         * @return the name of this architecture
+         */
+        const std::string core_type_name() const override { return traits<BASE>::core_type; }
+
+        uint64_t get_pc() override { return arch.reg.PC; }
+
+        uint64_t get_next_pc() override { return arch.reg.NEXT_PC; }
+
+        uint64_t get_instr_word() override { return arch.reg.instruction; }
+
+        uint64_t get_instr_count() override { return arch.reg.icount; }
+
+        uint64_t get_pendig_traps() override { return arch.reg.trap_state; }
+
+        uint64_t get_total_cycles() override { return arch.reg.cycle + arch.cycle_offset; }
+
+        void update_last_instr_cycles(unsigned cycles) override { arch.cycle_offset += cycles - 1; }
+
+        bool is_branch_taken() override { return arch.reg.last_branch; }
+
+        unsigned get_reg_num() override { return traits<BASE>::NUM_REGS; }
+
+        unsigned get_reg_size(unsigned num) override { return traits<BASE>::reg_bit_widths[num]; }
+
+        std::unordered_map<std::string, uint64_t> const& get_symbol_table(std::string name) override { return arch.symbol_table; }
+
+        riscv_hart_common<BASE, LOGCAT>& arch;
+    };
+
+    friend struct riscv_instrumentation_if;
+    riscv_instrumentation_if instr_if;
+
+    instrumentation_if* get_instrumentation_if() override { return &instr_if; };
+
+    using csr_type = util::sparse_array<typename traits<BASE>::reg_t, 1ULL << 12, 12>;
+    using csr_page_type = typename csr_type::page_type;
+    csr_type csr;
+
+    std::unordered_map<unsigned, rd_csr_f> csr_rd_cb;
+    std::unordered_map<unsigned, wr_csr_f> csr_wr_cb;
+
+    reg_t mhartid_reg{0x0};
+    uint64_t mcycle_csr{0};
+    uint64_t minstret_csr{0};
+    reg_t fault_data;
+
+    int64_t cycle_offset{0};
+    int64_t instret_offset{0};
+    semihosting_cb_t<reg_t> semihosting_cb;
+    unsigned mcause_max_irq{16U};
 };

 } // namespace arch
@@ -1,3 +1,37 @@
+/*******************************************************************************
+ * Copyright (C) 2023 - 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
 #ifndef _ISS_ARCH_TGC_MAPPER_H
 #define _ISS_ARCH_TGC_MAPPER_H

@@ -23,35 +57,29 @@ using tgc5c_xrb_nn_plat_type = iss::arch::hwl<iss::arch::riscv_hart_m_p<iss::arc
 #ifdef CORE_TGC5D
 #include "riscv_hart_mu_p.h"
 #include <iss/arch/tgc5d.h>
-using tgc5d_plat_type = iss::arch::riscv_hart_mu_p<iss::arch::tgc5d, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_CLIC |
-                                                                                             iss::arch::FEAT_EXT_N)>;
+using tgc5d_plat_type = iss::arch::riscv_hart_mu_p<iss::arch::tgc5d, (iss::arch::features_e)(iss::arch::FEAT_EXT_N)>;
 #endif
 #ifdef CORE_TGC5D_XRB_MAC
 #include "riscv_hart_mu_p.h"
 #include <iss/arch/tgc5d_xrb_mac.h>
-using tgc5d_xrb_mac_plat_type =
-    iss::arch::riscv_hart_mu_p<iss::arch::tgc5d_xrb_mac,
-                               (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_CLIC | iss::arch::FEAT_EXT_N)>;
+using tgc5d_xrb_mac_plat_type = iss::arch::riscv_hart_mu_p<iss::arch::tgc5d_xrb_mac(iss::arch::features_e)(iss::arch::FEAT_EXT_N)>;
 #endif
 #ifdef CORE_TGC5D_XRB_NN
 #include "hwl.h"
 #include "riscv_hart_mu_p.h"
 #include <iss/arch/tgc5d_xrb_nn.h>
 using tgc5d_xrb_nn_plat_type =
-    iss::arch::hwl<iss::arch::riscv_hart_mu_p<iss::arch::tgc5d_xrb_nn,
-                                              (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_CLIC | iss::arch::FEAT_EXT_N)>>;
+    iss::arch::hwl<iss::arch::riscv_hart_mu_p<iss::arch::tgc5d_xrb_nn, (iss::arch::features_e)(iss::arch::FEAT_EXT_N)>>;
 #endif
 #ifdef CORE_TGC5E
 #include "riscv_hart_mu_p.h"
 #include <iss/arch/tgc5e.h>
-using tgc5e_plat_type = iss::arch::riscv_hart_mu_p<iss::arch::tgc5e, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_CLIC |
-                                                                                             iss::arch::FEAT_EXT_N)>;
+using tgc5e_plat_type = iss::arch::riscv_hart_mu_p<iss::arch::tgc5e, (iss::arch::features_e)(iss::arch::FEAT_EXT_N)>;
 #endif
 #ifdef CORE_TGC5X
 #include "riscv_hart_mu_p.h"
 #include <iss/arch/tgc5x.h>
-using tgc5x_plat_type = iss::arch::riscv_hart_mu_p<iss::arch::tgc5x, (iss::arch::features_e)(iss::arch::FEAT_PMP | iss::arch::FEAT_CLIC |
-                                                                                             iss::arch::FEAT_EXT_N | iss::arch::FEAT_TCM)>;
+using tgc5x_plat_type = iss::arch::riscv_hart_mu_p<iss::arch::tgc5x, (iss::arch::features_e)(iss::arch::FEAT_EXT_N)>;
 #endif

 #endif
@@ -1,3 +1,37 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
 #include <array>
 // generated from:
 // * /scratch/eyck/workarea/Other/riscv-opcodes/csrs.csv
@@ -0,0 +1,285 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
+#include "memory_if.h"
+#include "iss/arch/riscv_hart_common.h"
+#include "iss/vm_types.h"
+#include <util/logging.h>
+
+namespace iss {
+namespace mem {
+struct clic_config {
+    uint64_t clic_base{0xc0000000};
+    unsigned clic_int_ctl_bits{4};
+    unsigned clic_num_irq{16};
+    unsigned clic_num_trigger{0};
+    bool nmode{false};
+};
+
+inline void read_reg_with_offset(uint32_t reg, uint8_t offs, uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 1 + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 2 + i);
+        break;
+    case 3:
+        *data = *(reg_ptr + 3);
+        break;
+    }
+}
+
+inline void write_reg_with_offset(uint32_t& reg, uint8_t offs, const uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + i) = *(data + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 1 + i) = *(data + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 2 + i) = *(data + i);
+        break;
+    case 3:
+        *(reg_ptr + 3) = *data;
+        break;
+    }
+}
+
+template <typename WORD_TYPE> struct clic : public memory_elem {
+    using this_class = clic<WORD_TYPE>;
+    using reg_t = WORD_TYPE;
+    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
+
+    clic(arch::priv_if<WORD_TYPE> hart_if, clic_config cfg)
+    : hart_if(hart_if)
+    , cfg(cfg) {
+        clic_int_reg.resize(cfg.clic_num_irq, clic_int_reg_t{.raw = 0});
+        clic_cfg_reg = 0x30;
+        clic_mact_lvl = clic_mprev_lvl = (1 << (cfg.clic_int_ctl_bits)) - 1;
+        clic_uact_lvl = clic_uprev_lvl = (1 << (cfg.clic_int_ctl_bits)) - 1;
+        hart_if.csr_rd_cb[arch::mtvt] = MK_CSR_RD_CB(read_plain);
+        hart_if.csr_wr_cb[arch::mtvt] = MK_CSR_WR_CB(write_xtvt);
+        //        hart_if.csr_rd_cb[mxnti] = MK_CSR_RD_CB(read_plain(a,r);};
+        //        hart_if.csr_wr_cb[mxnti] = MK_CSR_WR_CB(write_plain(a,r);};
+        hart_if.csr_rd_cb[arch::mintstatus] = MK_CSR_RD_CB(read_intstatus);
+        hart_if.csr_wr_cb[arch::mintstatus] = MK_CSR_WR_CB(write_null);
+        //        hart_if.csr_rd_cb[mscratchcsw] = MK_CSR_RD_CB(read_plain(a,r);};
+        //        hart_if.csr_wr_cb[mscratchcsw] = MK_CSR_WR_CB(write_plain(a,r);};
+        //        hart_if.csr_rd_cb[mscratchcswl] = MK_CSR_RD_CB(read_plain(a,r);};
+        //        hart_if.csr_wr_cb[mscratchcswl] = MK_CSR_WR_CB(write_plain(a,r);};
+        hart_if.csr_rd_cb[arch::mintthresh] = MK_CSR_RD_CB(read_plain);
+        hart_if.csr_wr_cb[arch::mintthresh] = MK_CSR_WR_CB(write_intthresh);
+        if(cfg.nmode) {
+            hart_if.csr_rd_cb[arch::utvt] = MK_CSR_RD_CB(read_plain);
+            hart_if.csr_wr_cb[arch::utvt] = MK_CSR_WR_CB(write_xtvt);
+            hart_if.csr_rd_cb[arch::uintstatus] = MK_CSR_RD_CB(read_intstatus);
+            hart_if.csr_wr_cb[arch::uintstatus] = MK_CSR_WR_CB(write_null);
+            hart_if.csr_rd_cb[arch::uintthresh] = MK_CSR_RD_CB(read_plain);
+            hart_if.csr_wr_cb[arch::uintthresh] = MK_CSR_WR_CB(write_intthresh);
+        }
+        hart_if.csr[arch::mintthresh] = (1 << (cfg.clic_int_ctl_bits)) - 1;
+        hart_if.csr[arch::uintthresh] = (1 << (cfg.clic_int_ctl_bits)) - 1;
+    }
+
+    ~clic() = default;
+
+    memory_if get_mem_if() override {
+        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
+                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
+    }
+
+    void set_next(memory_if mem) override { down_stream_mem = mem; }
+
+    std::tuple<uint64_t, uint64_t> get_range() override { return {cfg.clic_base, cfg.clic_base + 0x7fff}; }
+
+private:
+    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
+        if(addr >= cfg.clic_base && (addr + length) < (cfg.clic_base + 0x8000))
+            return read_clic(addr, length, data);
+        return down_stream_mem.rd_mem(access, addr, length, data);
+    }
+
+    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
+        if(addr >= cfg.clic_base && (addr + length) < (cfg.clic_base + 0x8000))
+            return write_clic(addr, length, data);
+        return down_stream_mem.wr_mem(access, addr, length, data);
+    }
+
+    iss::status read_clic(uint64_t addr, unsigned length, uint8_t* data);
+
+    iss::status write_clic(uint64_t addr, unsigned length, uint8_t const* data);
+
+    iss::status write_null(unsigned addr, reg_t val) { return iss::status::Ok; }
+
+    iss::status read_plain(unsigned addr, reg_t& val) {
+        val = hart_if.csr[addr];
+        return iss::Ok;
+    }
+
+    iss::status write_xtvt(unsigned addr, reg_t val) {
+        hart_if.csr[addr] = val & ~0x3fULL;
+        return iss::Ok;
+    }
+
+    iss::status read_cause(unsigned addr, reg_t& val);
+    iss::status write_cause(unsigned addr, reg_t val);
+
+    iss::status read_intstatus(unsigned addr, reg_t& val);
+    iss::status write_intthresh(unsigned addr, reg_t val);
+
+protected:
+    arch::priv_if<WORD_TYPE> hart_if;
+    memory_if down_stream_mem;
+    clic_config cfg;
+    uint8_t clic_cfg_reg{0};
+    std::array<uint32_t, 32> clic_inttrig_reg;
+    union clic_int_reg_t {
+        struct {
+            uint8_t ip;
+            uint8_t ie;
+            uint8_t attr;
+            uint8_t ctl;
+        };
+        uint32_t raw;
+    };
+    std::vector<clic_int_reg_t> clic_int_reg;
+    uint8_t clic_mprev_lvl{0}, clic_uprev_lvl{0};
+    uint8_t clic_mact_lvl{0}, clic_uact_lvl{0};
+};
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::read_clic(uint64_t addr, unsigned length, uint8_t* const data) {
+    if(addr == cfg.clic_base) { // cliccfg
+        *data = clic_cfg_reg;
+        for(auto i = 1; i < length; ++i)
+            *(data + i) = 0;
+    } else if(addr >= (cfg.clic_base + 0x40) && (addr + length) <= (cfg.clic_base + 0x40 + cfg.clic_num_trigger * 4)) { // clicinttrig
+        auto offset = ((addr & 0x7fff) - 0x40) / 4;
+        read_reg_with_offset(clic_inttrig_reg[offset], addr & 0x3, data, length);
+    } else if(addr >= (cfg.clic_base + 0x1000) &&
+              (addr + length) <= (cfg.clic_base + 0x1000 + cfg.clic_num_irq * 4)) { // clicintip/clicintie/clicintattr/clicintctl
+        auto offset = ((addr & 0x7fff) - 0x1000) / 4;
+        read_reg_with_offset(clic_int_reg[offset].raw, addr & 0x3, data, length);
+    } else {
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = 0;
+    }
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::write_clic(uint64_t addr, unsigned length, const uint8_t* const data) {
+    if(addr == cfg.clic_base) { // cliccfg
+        clic_cfg_reg = (clic_cfg_reg & ~0x1e) | (*data & 0x1e);
+    } else if(addr >= (cfg.clic_base + 0x40) && (addr + length) <= (cfg.clic_base + 0x40 + cfg.clic_num_trigger * 4)) { // clicinttrig
+        auto offset = ((addr & 0x7fff) - 0x40) / 4;
+        write_reg_with_offset(clic_inttrig_reg[offset], addr & 0x3, data, length);
+    } else if(addr >= (cfg.clic_base + 0x1000) &&
+              (addr + length) <= (cfg.clic_base + 0x1000 + cfg.clic_num_irq * 4)) { // clicintip/clicintie/clicintattr/clicintctl
+        auto offset = ((addr & 0x7fff) - 0x1000) / 4;
+        write_reg_with_offset(clic_int_reg[offset].raw, addr & 0x3, data, length);
+        clic_int_reg[offset].raw &= 0xf0c70101; // clicIntCtlBits->0xf0, clicintattr->0xc7, clicintie->0x1, clicintip->0x1
+    }
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::read_cause(unsigned addr, reg_t& val) {
+    if((hart_if.csr[arch::mtvec] & 0x3) == 3) {
+        val = hart_if.csr[addr] & (1UL << (sizeof(reg_t) * 8) | (hart_if.mcause_max_irq - 1) | (0xfUL << 16));
+        auto mode = (addr >> 8) & 0x3;
+        switch(mode) {
+        case 0:
+            val |= clic_uprev_lvl << 16;
+            val |= hart_if.state.mstatus.UPIE << 27;
+            break;
+        default:
+            val |= clic_mprev_lvl << 16;
+            val |= hart_if.state.mstatus.MPIE << 27;
+            val |= hart_if.state.mstatus.MPP << 28;
+            break;
+        }
+    } else
+        val = hart_if.csr[addr] & ((1UL << (sizeof(WORD_TYPE) * 8 - 1)) | (hart_if.mcause_max_irq - 1));
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::write_cause(unsigned addr, reg_t val) {
+    if((hart_if.csr[arch::mtvec] & 0x3) == 3) {
+        auto mask = ((1UL << (sizeof(WORD_TYPE) * 8 - 1)) | (hart_if.mcause_max_irq - 1) | (0xfUL << 16));
+        hart_if.csr[addr] = (val & mask) | (hart_if.csr[addr] & ~mask);
+        auto mode = (addr >> 8) & 0x3;
+        switch(mode) {
+        case 0:
+            clic_uprev_lvl = ((val >> 16) & 0xff) | (1 << (8 - cfg.clic_int_ctl_bits)) - 1;
+            hart_if.state.mstatus.UPIE = (val >> 27) & 0x1;
+            break;
+        default:
+            clic_mprev_lvl = ((val >> 16) & 0xff) | (1 << (8 - cfg.clic_int_ctl_bits)) - 1;
+            hart_if.state.mstatus.MPIE = (val >> 27) & 0x1;
+            hart_if.state.mstatus.MPP = (val >> 28) & 0x3;
+            break;
+        }
+    } else {
+        auto mask = ((1UL << (sizeof(WORD_TYPE) * 8 - 1)) | (hart_if.mcause_max_irq - 1));
+        hart_if.csr[addr] = (val & mask) | (hart_if.csr[addr] & ~mask);
+    }
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::read_intstatus(unsigned addr, reg_t& val) {
+    auto mode = (addr >> 8) & 0x3;
+    val = clic_uact_lvl & 0xff;
+    if(mode == 0x3)
+        val += (clic_mact_lvl & 0xff) << 24;
+    return iss::Ok;
+}
+
+template <typename WORD_TYPE> iss::status clic<WORD_TYPE>::write_intthresh(unsigned addr, reg_t val) {
+    hart_if.csr[addr] = (val & 0xff) | (1 << (cfg.clic_int_ctl_bits)) - 1;
+    return iss::Ok;
+}
+
+} // namespace mem
+} // namespace iss
@@ -0,0 +1,101 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
+#include "memory_if.h"
+#include <algorithm>
+
+namespace iss {
+namespace mem {
+void memory_hierarchy::root(memory_elem& e) {
+    hierarchy.push_front(&e);
+    root_set = true;
+    update_chain();
+}
+void memory_hierarchy::prepend(memory_elem& e) {
+    if(root_set)
+        hierarchy.insert(hierarchy.begin() + 1, &e);
+    else
+        hierarchy.push_front(&e);
+    update_chain();
+}
+void memory_hierarchy::append(memory_elem& e) {
+    hierarchy.push_back(&e);
+    update_chain();
+}
+void memory_hierarchy::insert_before(memory_elem&) {}
+void memory_hierarchy::insert_after(memory_elem&) {}
+void memory_hierarchy::replace_last(memory_elem& e) {
+    auto old = hierarchy.back();
+    auto it = std::find_if(std::begin(owned_elems), std::end(owned_elems),
+                           [old](std::unique_ptr<memory_elem> const& p) { return p.get() == old; });
+    hierarchy.pop_back();
+    if(it != std::end(owned_elems))
+        owned_elems.erase(it);
+    hierarchy.push_back(&e);
+    update_chain();
+}
+void memory_hierarchy::update_chain() {
+    bool tail = false;
+    for(size_t i = 1; i < hierarchy.size(); ++i) {
+        hierarchy[i - 1]->set_next(hierarchy[i]->get_mem_if());
+    }
+}
+
+void memory_hierarchy::prepend(std::unique_ptr<memory_elem>&& p) {
+    prepend(*p);
+    owned_elems.push_back(std::move(p));
+}
+
+void memory_hierarchy::append(std::unique_ptr<memory_elem>&& p) {
+    append(*p);
+    owned_elems.push_back(std::move(p));
+}
+
+void memory_hierarchy::insert_before(std::unique_ptr<memory_elem>&& p) {
+    insert_before(*p);
+    owned_elems.push_back(std::move(p));
+}
+
+void memory_hierarchy::insert_after(std::unique_ptr<memory_elem>&& p) {
+    insert_after(*p);
+    owned_elems.push_back(std::move(p));
+}
+
+void memory_hierarchy::replace_last(std::unique_ptr<memory_elem>&& p) {
+    replace_last(*p);
+    owned_elems.push_back(std::move(p));
+}
+
+} // namespace mem
+} // namespace iss
@@ -0,0 +1,86 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
+#ifndef _MEMORY_MEMORY_IF_
+#define _MEMORY_MEMORY_IF_
+
+#include "iss/vm_types.h"
+#include <deque>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <util/delegate.h>
+#include <vector>
+
+namespace iss {
+namespace mem {
+
+using rd_mem_func_sig = iss::status(iss::access_type, uint64_t, unsigned, uint8_t*);
+using wr_mem_func_sig = iss::status(iss::access_type, uint64_t, unsigned, uint8_t const*);
+
+struct memory_if {
+    util::delegate<iss::status(access_type, uint64_t, unsigned, uint8_t*)> rd_mem;
+    util::delegate<iss::status(access_type, uint64_t, unsigned, uint8_t const*)> wr_mem;
+};
+
+struct memory_elem {
+    virtual ~memory_elem() = default;
+    virtual memory_if get_mem_if() = 0;
+    virtual void set_next(memory_if) = 0;
+    virtual std::tuple<uint64_t, uint64_t> get_range() { return {0, std::numeric_limits<uint64_t>::max()}; }
+};
+
+struct memory_hierarchy {
+    void root(memory_elem&);
+    void prepend(memory_elem&);
+    void append(memory_elem&);
+    void insert_before(memory_elem&);
+    void insert_after(memory_elem&);
+    void replace_last(memory_elem&);
+    void prepend(std::unique_ptr<memory_elem>&&);
+    void append(std::unique_ptr<memory_elem>&&);
+    void insert_before(std::unique_ptr<memory_elem>&&);
+    void insert_after(std::unique_ptr<memory_elem>&&);
+    void replace_last(std::unique_ptr<memory_elem>&&);
+
+protected:
+    void update_chain();
+    std::deque<memory_elem*> hierarchy;
+    std::vector<std::unique_ptr<memory_elem>> owned_elems;
+    bool root_set{false};
+};
+
+} // namespace mem
+} // namespace iss
+#endif
@@ -0,0 +1,90 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
+#ifndef _MEMORY_WITH_HTIF_
+#define _MEMORY_WITH_HTIF_
+
+#include "memory_if.h"
+#include "iss/arch/riscv_hart_common.h"
+#include "iss/vm_types.h"
+#include <util/logging.h>
+#include <util/sparse_array.h>
+
+namespace iss {
+namespace mem {
+template <typename WORD_TYPE> struct memory_with_htif : public memory_elem {
+    using this_class = memory_with_htif<WORD_TYPE>;
+    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
+
+    memory_with_htif(arch::priv_if<WORD_TYPE> hart_if)
+    : hart_if(hart_if) {}
+
+    ~memory_with_htif() = default;
+
+    memory_if get_mem_if() override {
+        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
+                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
+    }
+
+    void set_next(memory_if) override {
+        // intenrionally left empty, leaf element
+    }
+
+private:
+    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
+        for(auto offs = 0U; offs < length; ++offs) {
+            *(data + offs) = mem[(addr + offs) % mem.size()];
+        }
+        return iss::Ok;
+    }
+
+    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
+        mem_type::page_type& p = mem(addr / mem.page_size);
+        std::copy(data, data + length, p.data() + (addr & mem.page_addr_mask));
+        // this->tohost handling in case of riscv-test
+        // according to https://github.com/riscv-software-src/riscv-isa-sim/issues/364#issuecomment-607657754:
+        if(access && iss::access_type::FUNC && addr == hart_if.tohost) {
+            return hart_if.exec_htif(data);
+        }
+        return iss::Ok;
+    }
+
+protected:
+    using mem_type = util::sparse_array<uint8_t, 1ULL << 32>;
+    mem_type mem;
+    arch::priv_if<WORD_TYPE> hart_if;
+};
+} // namespace mem
+} // namespace iss
+#endif // _MEMORY_WITH_HTIF_
@@ -0,0 +1,353 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
+#include "memory_if.h"
+#include "iss/arch/riscv_hart_common.h"
+#include "iss/vm_types.h"
+#include <util/logging.h>
+
+namespace iss {
+namespace mem {
+enum {
+    PGSHIFT = 12,
+    PTE_PPN_SHIFT = 10,
+    // page table entry (PTE) fields
+    PTE_V = 0x001,   // Valid
+    PTE_R = 0x002,   // Read
+    PTE_W = 0x004,   // Write
+    PTE_X = 0x008,   // Execute
+    PTE_U = 0x010,   // User
+    PTE_G = 0x020,   // Global
+    PTE_A = 0x040,   // Accessed
+    PTE_D = 0x080,   // Dirty
+    PTE_SOFT = 0x300 // Reserved for Software
+};
+
+template <typename T> inline bool PTE_TABLE(T PTE) { return (((PTE) & (PTE_V | PTE_R | PTE_W | PTE_X)) == PTE_V); }
+
+struct vm_info {
+    int levels;
+    int idxbits;
+    int ptesize;
+    uint64_t ptbase;
+    bool is_active() { return levels; }
+};
+
+inline void read_reg_with_offset(uint32_t reg, uint8_t offs, uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 1 + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 2 + i);
+        break;
+    case 3:
+        *data = *(reg_ptr + 3);
+        break;
+    }
+}
+
+inline void write_reg_with_offset(uint32_t& reg, uint8_t offs, const uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + i) = *(data + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 1 + i) = *(data + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 2 + i) = *(data + i);
+        break;
+    case 3:
+        *(reg_ptr + 3) = *data;
+        break;
+    }
+}
+// TODO: update vminfo on trap enter and leave as well as mstatus write, reset
+template <typename WORD_TYPE> struct mmu : public memory_elem {
+    using this_class = mmu<WORD_TYPE>;
+    using reg_t = WORD_TYPE;
+    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
+
+    constexpr static reg_t PGSIZE = 1 << PGSHIFT;
+    constexpr static reg_t PGMASK = PGSIZE - 1;
+
+    mmu(arch::priv_if<WORD_TYPE> hart_if)
+    : hart_if(hart_if) {
+        hart_if.csr_rd_cb[satp] = MK_CSR_RD_CB(read_satp);
+        hart_if.csr_wr_cb[satp] = MK_CSR_WR_CB(write_satp);
+    }
+
+    virtual ~mmu() = default;
+
+    memory_if get_mem_if() override {
+        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
+                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
+    }
+
+    void set_next(memory_if mem) override { down_stream_mem = mem; }
+
+private:
+    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
+        if(unlikely((addr & ~PGMASK) != ((addr + length - 1) & ~PGMASK))) { // we may cross a page boundary
+            vm_info vm = decode_vm_info(hart_if.PRIV, satp);
+            if(vm.levels != 0) { // VM is active
+                auto split_addr = (addr + length) & ~PGMASK;
+                auto len1 = split_addr - addr;
+                auto res = down_stream_mem.rd_mem(access, addr, len1, data);
+                if(res == iss::Ok)
+                    res = down_stream_mem.rd_mem(access, split_addr, length - len1, data + len1);
+                return res;
+            }
+        }
+        return down_stream_mem.rd_mem(access, addr, length, data);
+    }
+
+    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
+        if(unlikely((addr & ~PGMASK) != ((addr + length - 1) & ~PGMASK))) { // we may cross a page boundary
+            vm_info vm = decode_vm_info(hart_if.PRIV, satp);
+            if(vm.levels != 0) { // VM is active
+                auto split_addr = (addr + length) & ~PGMASK;
+                auto len1 = split_addr - addr;
+                auto res = down_stream_mem.wr_mem(access, addr, len1, data);
+                if(res == iss::Ok)
+                    res = down_stream_mem.wr_mem(access, split_addr, length - len1, data + len1);
+                return res;
+            }
+        }
+        return down_stream_mem.wr_mem(access, virt2phys(access, addr), length, data);
+    }
+    void update_vm_info();
+
+    iss::status read_plain(unsigned addr, reg_t& val) {
+        val = hart_if.csr[addr];
+        return iss::Ok;
+    }
+
+    iss::status write_plain(unsigned addr, reg_t const& val) {
+        hart_if.csr[addr] = val;
+        return iss::Ok;
+    }
+
+    iss::status read_satp(unsigned addr, reg_t& val) {
+        auto tvm = bit_sub<20, 1>(hart_if.state.mstatus());
+        if(hart_if.PRIV == arch::PRIV_S & tvm != 0) {
+            hart_if.raise_trap(2, 0, hart_if.PC);
+            //            hart_if.reg.trap_state = (1 << 31) | (2 << 16);
+            //            hart_if.fault_data = hart_if.reg.PC;
+            return iss::Err;
+        }
+        val = satp;
+        return iss::Ok;
+    }
+
+    iss::status write_satp(unsigned addr, reg_t val) {
+        reg_t tvm = hart_if.state.mstatus.TVM;
+        if(hart_if.PRIV == arch::PRIV_S & tvm != 0) {
+            hart_if.raise_trap(2, 0, hart_if.PC);
+            //            hart_if.reg.trap_state = (1 << 31) | (2 << 16);
+            //            hart_if.fault_data = hart_if.reg.PC;
+            return iss::Err;
+        }
+        satp = val;
+        update_vm_info();
+        return iss::Ok;
+    }
+
+    uint64_t virt2phys(iss::access_type access, uint64_t addr);
+
+    static inline vm_info decode_vm_info(uint32_t state, uint32_t sptbr) {
+        if(state == arch::PRIV_M)
+            return {0, 0, 0, 0};
+        if(state <= arch::PRIV_S)
+            switch(bit_sub<31, 1>(sptbr)) {
+            case 0:
+                return {0, 0, 0, 0}; // off
+            case 1:
+                return {2, 10, 4, bit_sub<0, 22>(sptbr) << PGSHIFT}; // SV32
+            default:
+                abort();
+            }
+        abort();
+        return {0, 0, 0, 0}; // dummy
+    }
+
+    static inline vm_info decode_vm_info(uint32_t state, uint64_t sptbr) {
+        if(state == arch::PRIV_M)
+            return {0, 0, 0, 0};
+        if(state <= arch::PRIV_S)
+            switch(bit_sub<60, 4>(sptbr)) {
+            case 0:
+                return {0, 0, 0, 0}; // off
+            case 8:
+                return {3, 9, 8, bit_sub<0, 44>(sptbr) << PGSHIFT}; // SV39
+            case 9:
+                return {4, 9, 8, bit_sub<0, 44>(sptbr) << PGSHIFT}; // SV48
+            case 10:
+                return {5, 9, 8, bit_sub<0, 44>(sptbr) << PGSHIFT}; // SV57
+            case 11:
+                return {6, 9, 8, bit_sub<0, 44>(sptbr) << PGSHIFT}; // SV64
+            default:
+                abort();
+            }
+        abort();
+        return {0, 0, 0, 0}; // dummy
+    }
+
+protected:
+    reg_t satp;
+    std::unordered_map<reg_t, uint64_t> ptw;
+    std::array<vm_info, 2> vmt;
+    std::array<address_type, 4> addr_mode;
+
+    arch::priv_if<WORD_TYPE> hart_if;
+    memory_if down_stream_mem;
+};
+
+template <typename WORD_TYPE> uint64_t mmu<WORD_TYPE>::virt2phys(iss::access_type access, uint64_t addr) {
+    const auto type = access & iss::access_type::FUNC;
+    auto it = ptw.find(addr >> PGSHIFT);
+    if(it != ptw.end()) {
+        const reg_t pte = it->second;
+        const reg_t ad = PTE_A | (type == iss::access_type::WRITE) * PTE_D;
+#ifdef RISCV_ENABLE_DIRTY
+        // set accessed and possibly dirty bits.
+        *(uint32_t*)ppte |= ad;
+        return {addr.getAccessType(), addr.space, (pte & (~PGMASK)) | (addr.val & PGMASK)};
+#else
+        // take exception if access or possibly dirty bit is not set.
+        if((pte & ad) == ad)
+            return {(pte & (~PGMASK)) | (addr & PGMASK)};
+        else
+            ptw.erase(it); // throw an exception
+#endif
+    } else {
+        uint32_t mode = type != iss::access_type::FETCH && hart_if.state.mstatus.MPRV ? // MPRV
+                            hart_if.state.mstatus.MPP
+                                                                                      : hart_if.PRIV;
+
+        const vm_info& vm = vmt[static_cast<uint16_t>(type) / 2];
+
+        const bool s_mode = mode == arch::PRIV_S;
+        const bool sum = hart_if.state.mstatus.SUM;
+        const bool mxr = hart_if.state.mstatus.MXR;
+
+        // verify bits xlen-1:va_bits-1 are all equal
+        const int va_bits = PGSHIFT + vm.levels * vm.idxbits;
+        const reg_t mask = (reg_t(1) << (sizeof(reg_t) * 8 - (va_bits - 1))) - 1;
+        const reg_t masked_msbs = (addr >> (va_bits - 1)) & mask;
+        const int levels = (masked_msbs != 0 && masked_msbs != mask) ? 0 : vm.levels;
+
+        reg_t base = vm.ptbase;
+        for(int i = levels - 1; i >= 0; i--) {
+            const int ptshift = i * vm.idxbits;
+            const reg_t idx = (addr >> (PGSHIFT + ptshift)) & ((1 << vm.idxbits) - 1);
+
+            // check that physical address of PTE is legal
+            reg_t pte = 0;
+            const uint8_t res = down_stream_mem.rd_mem(iss::access_type::READ, base + idx * vm.ptesize, vm.ptesize, (uint8_t*)&pte);
+            if(res != 0)
+                throw arch::trap_load_access_fault(addr);
+            const reg_t ppn = pte >> PTE_PPN_SHIFT;
+
+            if(PTE_TABLE(pte)) { // next level of page table
+                base = ppn << PGSHIFT;
+            } else if((pte & PTE_U) ? s_mode && (type == iss::access_type::FETCH || !sum) : !s_mode) {
+                break;
+            } else if(!(pte & PTE_V) || (!(pte & PTE_R) && (pte & PTE_W))) {
+                break;
+            } else if(type == (type == iss::access_type::FETCH  ? !(pte & PTE_X)
+                               : type == iss::access_type::READ ? !(pte & PTE_R) && !(mxr && (pte & PTE_X))
+                                                                : !((pte & PTE_R) && (pte & PTE_W)))) {
+                break;
+            } else if((ppn & ((reg_t(1) << ptshift) - 1)) != 0) {
+                break;
+            } else {
+                const reg_t ad = PTE_A | ((type == iss::access_type::WRITE) * PTE_D);
+#ifdef RISCV_ENABLE_DIRTY
+                // set accessed and possibly dirty bits.
+                *(uint32_t*)ppte |= ad;
+#else
+                // take exception if access or possibly dirty bit is not set.
+                if((pte & ad) != ad)
+                    break;
+#endif
+                // for superpage mappings, make a fake leaf PTE for the TLB's benefit.
+                const reg_t vpn = addr >> PGSHIFT;
+                const reg_t value = (ppn | (vpn & ((reg_t(1) << ptshift) - 1))) << PGSHIFT;
+                const reg_t offset = addr & PGMASK;
+                ptw[vpn] = value | (pte & 0xff);
+                return value | offset;
+            }
+        }
+    }
+    switch(type) {
+    case access_type::FETCH:
+        hart_if.raise_trap(12, 0, addr);
+        throw arch::trap_instruction_page_fault(addr);
+    case access_type::READ:
+        hart_if.raise_trap(13, 0, addr);
+        throw arch::trap_load_page_fault(addr);
+    case access_type::WRITE:
+        hart_if.raise_trap(15, 0, addr);
+        throw arch::trap_store_page_fault(addr);
+    default:
+        abort();
+    }
+}
+
+template <typename WORD_TYPE> inline void mmu<WORD_TYPE>::update_vm_info() {
+    vmt[1] = decode_vm_info(hart_if.PRIV, satp);
+    addr_mode[3] = addr_mode[2] = vmt[1].is_active() ? iss::address_type::VIRTUAL : iss::address_type::PHYSICAL;
+    if(hart_if.state.mstatus.MPRV)
+        vmt[0] = decode_vm_info(hart_if.state.mstatus.MPP, satp);
+    else
+        vmt[0] = vmt[1];
+    addr_mode[1] = addr_mode[0] = vmt[0].is_active() ? iss::address_type::VIRTUAL : iss::address_type::PHYSICAL;
+    ptw.clear();
+}
+
+} // namespace mem
+} // namespace iss
@@ -0,0 +1,244 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
+#include "memory_if.h"
+#include "iss/arch/riscv_hart_common.h"
+#include "iss/vm_types.h"
+#include <util/logging.h>
+
+namespace iss {
+namespace mem {
+struct clic_config {
+    uint64_t clic_base{0xc0000000};
+    unsigned clic_int_ctl_bits{4};
+    unsigned clic_num_irq{16};
+    unsigned clic_num_trigger{0};
+    bool nmode{false};
+};
+
+inline void read_reg_with_offset(uint32_t reg, uint8_t offs, uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 1 + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(data + i) = *(reg_ptr + 2 + i);
+        break;
+    case 3:
+        *data = *(reg_ptr + 3);
+        break;
+    }
+}
+
+inline void write_reg_with_offset(uint32_t& reg, uint8_t offs, const uint8_t* const data, unsigned length) {
+    auto reg_ptr = reinterpret_cast<uint8_t*>(&reg);
+    switch(offs) {
+    default:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + i) = *(data + i);
+        break;
+    case 1:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 1 + i) = *(data + i);
+        break;
+    case 2:
+        for(auto i = 0U; i < length; ++i)
+            *(reg_ptr + 2 + i) = *(data + i);
+        break;
+    case 3:
+        *(reg_ptr + 3) = *data;
+        break;
+    }
+}
+
+template <typename WORD_TYPE> struct pmp : public memory_elem {
+    using this_class = pmp<WORD_TYPE>;
+    using reg_t = WORD_TYPE;
+    constexpr static unsigned WORD_LEN = sizeof(WORD_TYPE) * 8;
+
+    pmp(arch::priv_if<WORD_TYPE> hart_if)
+    : hart_if(hart_if) {
+        for(size_t i = arch::pmpaddr0; i <= arch::pmpaddr15; ++i) {
+            hart_if.csr_rd_cb[i] = MK_CSR_RD_CB(read_plain);
+            hart_if.csr_wr_cb[i] = MK_CSR_WR_CB(write_plain);
+        }
+        for(size_t i = arch::pmpcfg0; i < arch::pmpcfg0 + 16 / sizeof(reg_t); ++i) {
+            hart_if.csr_rd_cb[i] = MK_CSR_RD_CB(read_plain);
+            hart_if.csr_wr_cb[i] = MK_CSR_WR_CB(write_pmpcfg);
+        }
+    }
+
+    virtual ~pmp() = default;
+
+    memory_if get_mem_if() override {
+        return memory_if{.rd_mem{util::delegate<rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
+                         .wr_mem{util::delegate<wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
+    }
+
+    void set_next(memory_if mem) override { down_stream_mem = mem; }
+
+private:
+    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
+        if(!pmp_check(access, addr, length) && !is_debug(access)) {
+            hart_if.fault_data = addr;
+            if(is_debug(access))
+                throw trap_access(0, addr);
+            hart_if.reg.trap_state = (1UL << 31) | ((access == access_type::FETCH ? 1 : 5) << 16); // issue trap 1
+            return iss::Err;
+        }
+        return down_stream_mem.rd_mem(access, addr, length, data);
+    }
+
+    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
+        if(!pmp_check(access, addr, length) && !is_debug(access)) {
+            hart_if.fault_data = addr;
+            if(is_debug(access))
+                throw trap_access(0, addr);
+            hart_if.reg.trap_state = (1UL << 31) | (7 << 16); // issue trap 1
+            return iss::Err;
+        }
+        return down_stream_mem.wr_mem(access, addr, length, data);
+    }
+
+    iss::status read_plain(unsigned addr, reg_t& val) {
+        val = hart_if.csr[addr];
+        return iss::Ok;
+    }
+
+    iss::status write_plain(unsigned addr, reg_t const& val) {
+        hart_if.csr[addr] = val;
+        return iss::Ok;
+    }
+
+    iss::status write_pmpcfg(unsigned addr, reg_t val) {
+        hart_if.csr[addr] = val & 0x9f9f9f9f;
+        return iss::Ok;
+    }
+
+    bool pmp_check(const access_type type, const uint64_t addr, const unsigned len);
+
+protected:
+    arch::priv_if<WORD_TYPE> hart_if;
+    memory_if down_stream_mem;
+};
+
+template <typename WORD_TYPE> bool pmp<WORD_TYPE>::pmp_check(const access_type type, const uint64_t addr, const unsigned len) {
+    constexpr auto PMP_SHIFT = 2U;
+    constexpr auto PMP_R = 0x1U;
+    constexpr auto PMP_W = 0x2U;
+    constexpr auto PMP_X = 0x4U;
+    constexpr auto PMP_A = 0x18U;
+    constexpr auto PMP_L = 0x80U;
+    constexpr auto PMP_TOR = 0x1U;
+    constexpr auto PMP_NA4 = 0x2U;
+    constexpr auto PMP_NAPOT = 0x3U;
+    reg_t base = 0;
+    auto any_active = false;
+    auto const cfg_reg_size = sizeof(reg_t);
+    for(size_t i = 0; i < 16; i++) {
+        reg_t tor = hart_if.csr[arch::pmpaddr0 + i] << PMP_SHIFT;
+        uint8_t cfg = hart_if.csr[arch::pmpcfg0 + (i / cfg_reg_size)] >> (i % cfg_reg_size);
+        if(cfg & PMP_A) {
+            any_active = true;
+            auto pmp_a = (cfg & PMP_A) >> 3;
+            auto is_tor = pmp_a == PMP_TOR;
+            auto is_na4 = pmp_a == PMP_NA4;
+
+            reg_t mask = (hart_if.csr[arch::pmpaddr0 + i] << 1) | (!is_na4);
+            mask = ~(mask & ~(mask + 1)) << PMP_SHIFT;
+
+            // Check each 4-byte sector of the access
+            auto any_match = false;
+            auto all_match = true;
+            for(reg_t offset = 0; offset < len; offset += 1 << PMP_SHIFT) {
+                reg_t cur_addr = addr + offset;
+                auto napot_match = ((cur_addr ^ tor) & mask) == 0;
+                auto tor_match = base <= (cur_addr + len - 1) && cur_addr < tor;
+                auto match = is_tor ? tor_match : napot_match;
+                any_match |= match;
+                all_match &= match;
+            }
+            if(any_match) {
+                // If the PMP matches only a strict subset of the access, fail it
+                if(!all_match)
+                    return false;
+                return (hart_if.reg.PRIV == arch::PRIV_M && !(cfg & PMP_L)) || (type == access_type::READ && (cfg & PMP_R)) ||
+                       (type == access_type::WRITE && (cfg & PMP_W)) || (type == access_type::FETCH && (cfg & PMP_X));
+            }
+        }
+        base = tor;
+    }
+    //    constexpr auto pmp_num_regs = 16;
+    //    reg_t tor_base = 0;
+    //    auto any_active = false;
+    //    auto lower_addr = addr >>2;
+    //    auto upper_addr = (addr+len-1)>>2;
+    //    for (size_t i = 0; i < pmp_num_regs; i++) {
+    //        uint8_t cfg = csr[pmpcfg0+(i/4)]>>(i%4);
+    //        uint8_t cfg_next = i==(pmp_num_regs-1)? 0 : csr[pmpcfg0+((i+1)/4)]>>((i+1)%4);
+    //        auto pmpaddr = csr[pmpaddr0+i];
+    //        if (cfg & PMP_A) {
+    //            any_active=true;
+    //            auto is_tor = bit_sub<3, 2>(cfg) == PMP_TOR;
+    //            auto is_napot = bit_sub<4, 1>(cfg) && bit_sub<3, 2>(cfg_next)!= PMP_TOR;
+    //            if(is_napot) {
+    //                reg_t mask = bit_sub<3, 1>(cfg)?~( pmpaddr & ~(pmpaddr + 1)): 0x3fffffff;
+    //                auto mpmpaddr = pmpaddr & mask;
+    //                if((lower_addr&mask) == mpmpaddr && (upper_addr&mask)==mpmpaddr)
+    //                    return  (hart_if.reg.PRIV == PRIV_M && !(cfg & PMP_L)) ||
+    //                            (type == access_type::READ && (cfg & PMP_R)) ||
+    //                            (type == access_type::WRITE && (cfg & PMP_W)) ||
+    //                            (type == access_type::FETCH && (cfg & PMP_X));
+    //            } else if(is_tor) {
+    //                if(lower_addr>=tor_base && upper_addr<=pmpaddr)
+    //                    return  (hart_if.reg.PRIV == PRIV_M && !(cfg & PMP_L)) ||
+    //                            (type == access_type::READ && (cfg & PMP_R)) ||
+    //                            (type == access_type::WRITE && (cfg & PMP_W)) ||
+    //                            (type == access_type::FETCH && (cfg & PMP_X));
+    //            }
+    //        }
+    //        tor_base = pmpaddr;
+    //    }
+    return !any_active || hart_if.reg.PRIV == arch::PRIV_M;
+}
+
+} // namespace mem
+} // namespace iss
@@ -1,3 +1,37 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
 #include "semihosting.h"
 #include <chrono>
 #include <cstdint>
@@ -1,3 +1,37 @@
+/*******************************************************************************
+ * Copyright (C) 2025 MINRES Technologies GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/
+
 #ifndef _SEMIHOSTING_H_
 #define _SEMIHOSTING_H_
 #include <chrono>
@@ -58,4 +92,4 @@ template <typename T> struct semihosting_callback {
 };

 template <typename T> using semihosting_cb_t = std::function<void(iss::arch_if*, T*, T*)>;
-#endif
+#endif
@@ -40,6 +40,7 @@
 #include <vector>

 #include "iss/arch/tgc_mapper.h"
+#include "util/logging.h"
 #include <boost/lexical_cast.hpp>
 #include <boost/program_options.hpp>
 #ifdef WITH_LLVM
@@ -258,6 +259,7 @@ int main(int argc, char* argv[]) {
                LOG(ERR) << "Error opening file " << filename << std::endl;
                return 1;
            }
+            LOGGER(DEFAULT)::reporting_level() = logging::ERR;
            for(auto addr = start_addr; addr < end_addr; addr += data.size()) {
                vm->get_arch()->read(iss::address_type::PHYSICAL, iss::access_type::DEBUG_READ, 0 /*MEM*/, addr, data.size(),
                                     data.data()); // FIXME: get space from iss::arch::traits<ARCH>::mem_type_e::MEM
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017, 2018 MINRES Technologies GmbH
+ * Copyright (C) 2017 - 2025 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -387,7 +387,7 @@ template <unsigned int BUSWIDTH> void core_complex<BUSWIDTH>::run() {
        quantum_keeper.reset();
        cpu->set_interrupt_execution(false);
        cpu->start(dump_ir);
-    } while(cpu->get_interrupt_execution());
+    } while(!cpu->get_interrupt_execution());
    sc_stop();
 }

@@ -419,7 +419,7 @@ template <unsigned int BUSWIDTH> bool core_complex<BUSWIDTH>::read_mem(uint64_t
            gp.set_extension(preExt);
        }
        auto pre_delay = delay;
-        dbus->b_transport(gp, delay);
+        sckt->b_transport(gp, delay);
        if(pre_delay > delay) {
            quantum_keeper.reset();
        } else {
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (C) 2017-2021 MINRES Technologies GmbH
+ * Copyright (C) 2017 - 2025 MINRES Technologies GmbH
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
@@ -1,9 +1,36 @@
-/*
- * sc_core_adapter.h
+/*******************************************************************************
+ * Copyright (C) 2023 - 2025 MINRES Technologies GmbH
+ * All rights reserved.
 *
- *  Created on: Jul 5, 2023
- *      Author: eyck
- */
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/

 #ifndef _SYSC_SC_CORE_ADAPTER_H_
 #define _SYSC_SC_CORE_ADAPTER_H_
@@ -11,6 +38,7 @@
 #include "sc_core_adapter_if.h"
 #include <iostream>
 #include <iss/iss.h>
+#include <iss/mem/memory_if.h>
 #include <iss/vm_types.h>
 #include <scc/report.h>
 #include <util/ities.h>
@@ -18,11 +46,16 @@
 namespace sysc {
 template <typename PLAT> class sc_core_adapter : public PLAT, public sc_core_adapter_if {
 public:
+    using this_class = sc_core_adapter<PLAT>;
    using reg_t = typename iss::arch::traits<typename PLAT::core>::reg_t;
    using phys_addr_t = typename iss::arch::traits<typename PLAT::core>::phys_addr_t;
-    using heart_state_t = typename PLAT::hart_state_type;
    sc_core_adapter(sysc::tgfs::core_complex_if* owner)
-    : owner(owner) {}
+    : owner(owner) {
+        this->csr_rd_cb[iss::arch::time] = MK_CSR_RD_CB(read_time);
+        if(sizeof(reg_t) == 4)
+            this->csr_rd_cb[iss::arch::timeh] = MK_CSR_RD_CB(read_time);
+        this->memories.replace_last(*this);
+    }

    iss::arch_if* get_arch_if() override { return this; }

@@ -60,79 +93,87 @@ public:
        }
    };

-    iss::status read_mem(phys_addr_t addr, unsigned length, uint8_t* const data) override {
-        if(addr.access && iss::access_type::DEBUG)
-            return owner->read_mem_dbg(addr.val, length, data) ? iss::Ok : iss::Err;
+    iss::mem::memory_if get_mem_if() override {
+        return iss::mem::memory_if{.rd_mem{util::delegate<iss::mem::rd_mem_func_sig>::from<this_class, &this_class::read_mem>(this)},
+                                   .wr_mem{util::delegate<iss::mem::wr_mem_func_sig>::from<this_class, &this_class::write_mem>(this)}};
+    }
+
+    iss::status read_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t* data) {
+        if(access && iss::access_type::DEBUG)
+            return owner->read_mem_dbg(addr, length, data) ? iss::Ok : iss::Err;
        else {
-            return owner->read_mem(addr.val, length, data, is_fetch(addr.access)) ? iss::Ok : iss::Err;
+            return owner->read_mem(addr, length, data, is_fetch(access)) ? iss::Ok : iss::Err;
        }
    }

-    iss::status write_mem(phys_addr_t addr, unsigned length, const uint8_t* const data) override {
-        if(addr.access && iss::access_type::DEBUG)
-            return owner->write_mem_dbg(addr.val, length, data) ? iss::Ok : iss::Err;
-        else {
-            auto tohost_upper = (sizeof(reg_t) == 4 && addr.val == (this->tohost + 4)) || (sizeof(reg_t) == 8 && addr.val == this->tohost);
-            auto tohost_lower = (sizeof(reg_t) == 4 && addr.val == this->tohost) || (sizeof(reg_t) == 64 && addr.val == this->tohost);
-            if(tohost_lower || tohost_upper) {
-                if(tohost_upper || (tohost_lower && to_host_wr_cnt > 0)) {
-                    switch(hostvar >> 48) {
-                    case 0:
-                        if(hostvar != 0x1) {
-                            SCCINFO(owner->hier_name())
-                                << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar << "), stopping simulation";
-                        } else {
-                            SCCINFO(owner->hier_name())
-                                << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar << "), stopping simulation";
-                        }
-                        this->reg.trap_state = std::numeric_limits<uint32_t>::max();
-                        this->interrupt_sim = hostvar;
-#ifndef WITH_TCC
-                        throw(iss::simulation_stopped(hostvar));
-#endif
-                        break;
-                    default:
-                        break;
-                    }
-                } else if(tohost_lower)
-                    to_host_wr_cnt++;
-                return iss::Ok;
-            } else {
-                auto res = owner->write_mem(addr.val, length, data) ? iss::Ok : iss::Err;
-                // clear MTIP on mtimecmp write
-                if(addr.val == 0x2004000) {
-                    reg_t val;
-                    this->read_csr(iss::arch::mip, val);
-                    if(val & (1ULL << 7))
-                        this->write_csr(iss::arch::mip, val & ~(1ULL << 7));
+    iss::status write_mem(iss::access_type access, uint64_t addr, unsigned length, uint8_t const* data) {
+        if(access && iss::access_type::DEBUG)
+            return owner->write_mem_dbg(addr, length, data) ? iss::Ok : iss::Err;
+        if(addr == this->tohost) {
+            reg_t cur_data = *reinterpret_cast<const reg_t*>(data);
+            // Extract Device (bits 63:56)
+            uint8_t device = sizeof(reg_t) == 4 ? 0 : (cur_data >> 56) & 0xFF;
+            // Extract Command (bits 55:48)
+            uint8_t command = sizeof(reg_t) == 4 ? 0 : (cur_data >> 48) & 0xFF;
+            // Extract payload (bits 47:0)
+            uint64_t payload_addr = cur_data & 0xFFFFFFFFFFFFULL; // 24bits
+            if(payload_addr & 1) {
+                if(payload_addr != 0x1) {
+                    SCCERR(owner->hier_name()) << "tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr
+                                               << "), stopping simulation";
+                } else {
+                    SCCINFO(owner->hier_name())
+                        << "tohost value is 0x" << std::hex << payload_addr << std::dec << " (" << payload_addr << "), stopping simulation";
                }
-                return res;
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
            }
+            if(device == 0 && command == 0) {
+                std::array<uint64_t, 8> loaded_payload;
+                auto res = owner->read_mem(payload_addr, 8 * sizeof(uint64_t), reinterpret_cast<uint8_t*>(loaded_payload.data()), false)
+                               ? iss::Ok
+                               : iss::Err;
+                if(res == iss::Err) {
+                    SCCERR(owner->hier_name()) << "Syscall read went wrong";
+                    return iss::Ok;
+                }
+                uint64_t syscall_num = loaded_payload.at(0);
+                if(syscall_num == 64) // SYS_WRITE
+                    return this->execute_sys_write(this, loaded_payload, PLAT::MEM);
+                SCCERR(owner->hier_name()) << "tohost syscall with number 0x" << std::hex << syscall_num << std::dec << " (" << syscall_num
+                                           << ") not implemented";
+                this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+                this->interrupt_sim = payload_addr;
+                return iss::Ok;
+            }
+            SCCERR(owner->hier_name()) << "tohost functionality not implemented for device " << device << " and command " << command;
+            this->reg.trap_state = std::numeric_limits<uint32_t>::max();
+            this->interrupt_sim = payload_addr;
+            return iss::Ok;
        }
+        auto res = owner->write_mem(addr, length, data) ? iss::Ok : iss::Err;
+        return res;
    }

-    iss::status read_csr(unsigned addr, reg_t& val) override {
-        if((addr == iss::arch::time || addr == iss::arch::timeh)) {
-            uint64_t time_val = owner->mtime_i.get_interface() ? owner->mtime_i.read() : 0;
-            if(addr == iss::arch::time) {
-                val = static_cast<reg_t>(time_val);
-            } else if(addr == iss::arch::timeh) {
-                if(sizeof(reg_t) != 4)
-                    return iss::Err;
-                val = static_cast<reg_t>(time_val >> 32);
-            }
-            return iss::Ok;
-        } else {
-            return PLAT::read_csr(addr, val);
+    iss::status read_time(unsigned addr, reg_t& val) {
+        uint64_t time_val = owner->mtime_i.get_interface() ? owner->mtime_i.read() : 0;
+        if(addr == iss::arch::time) {
+            val = static_cast<reg_t>(time_val);
+        } else if(addr == iss::arch::timeh) {
+            if(sizeof(reg_t) != 4)
+                return iss::Err;
+            val = static_cast<reg_t>(time_val >> 32);
        }
+        return iss::Ok;
    }

    void wait_until(uint64_t flags) override {
        SCCDEBUG(owner->hier_name()) << "Sleeping until interrupt";
+        PLAT::wait_until(flags);
        while(this->reg.pending_trap == 0 && (this->csr[iss::arch::mip] & this->csr[iss::arch::mie]) == 0) {
            sc_core::wait(wfi_evt);
        }
-        PLAT::wait_until(flags);
    }

    void local_irq(short id, bool value) override {
@@ -165,7 +206,6 @@ public:
 private:
    sysc::tgfs::core_complex_if* const owner{nullptr};
    sc_core::sc_event wfi_evt;
-    uint64_t hostvar{std::numeric_limits<uint64_t>::max()};
    unsigned to_host_wr_cnt = 0;
    bool first{true};
 };
@@ -1,9 +1,36 @@
-/*
- * sc_core_adapter.h
+/*******************************************************************************
+ * Copyright (C) 2023 MINRES Technologies GmbH
+ * All rights reserved.
 *
- *  Created on: Jul 5, 2023
- *      Author: eyck
- */
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Contributors:
+ *       eyck@minres.com - initial implementation
+ ******************************************************************************/

 #ifndef _SYSC_SC_CORE_ADAPTER_IF_H_
 #define _SYSC_SC_CORE_ADAPTER_IF_H_
@@ -0,0 +1,69 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2025, MINRES Technologies GmbH
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its contributors
+//    may be used to endorse or promote products derived from this software
+//    without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Contributors:
+//       ales@minres.com - initial API and implementation
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _VM_AES_SBOX_H_
+#define _VM_AES_SBOX_H_
+#include <cstdint>
+
+const uint8_t AES_ENC_SBOX[] = {
+    0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59,
+    0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1,
+    0x71, 0xD8, 0x31, 0x15, 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83,
+    0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
+    0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C,
+    0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC,
+    0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE,
+    0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+    0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6,
+    0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9,
+    0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, 0x8C, 0xA1,
+    0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16};
+uint8_t inline aes_sbox_fwd(uint8_t index) { return AES_ENC_SBOX[index]; }
+
+const uint8_t AES_DEC_SBOX[] = {
+    0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F,
+    0xFF, 0x87, 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B,
+    0x42, 0xFA, 0xC3, 0x4E, 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8,
+    0xF6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
+    0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84, 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3,
+    0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02, 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+    0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73, 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9,
+    0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89, 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
+    0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4, 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07,
+    0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D, 0x2D, 0xE5, 0x7A, 0x9F,
+    0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2B,
+    0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D};
+uint8_t inline aes_sbox_inv(uint8_t index) { return AES_DEC_SBOX[index]; }
+
+#endif /* _VM_AES_SBOX_H_ */
@@ -94,7 +94,7 @@ protected:
    using this_class = vm_impl<ARCH>;
    using compile_func = continuation_e (this_class::*)(virt_addr_t&, code_word_t, jit_holder&);

-    continuation_e gen_single_inst_behavior(virt_addr_t&, unsigned int &, jit_holder&) override;
+    continuation_e gen_single_inst_behavior(virt_addr_t&, jit_holder&) override;
    enum globals_e {TVAL = 0, GLOBALS_SIZE};
    void gen_block_prologue(jit_holder& jh) override;
    void gen_block_epilogue(jit_holder& jh) override;
@@ -4780,19 +4780,16 @@ vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)
    }()) {}

 template <typename ARCH>
-continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, jit_holder& jh) {
+continuation_e vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, jit_holder& jh) {
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    phys_addr_t paddr(pc);
    auto *const data = (uint8_t *)&instr;
-    if(this->core.has_mmu())
-        paddr = this->core.virt2phys(pc);
    auto res = this->core.read(paddr, 4, data);
    if (res != iss::Ok)
        return ILLEGAL_FETCH;
    if (instr == 0x0000006f || (instr&0xffff)==0xa001)
        return JUMP_TO_SELF;
-    ++inst_cnt;
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
@@ -4927,4 +4924,4 @@ volatile std::array<bool, 2> dummy = {
 };
 }
 }
-// clang-format on
+// clang-format on
@@ -33,7 +33,9 @@
 ////////////////////////////////////////////////////////////////////////////////

 #include "fp_functions.h"
+#include "softfloat_types.h"
 #include <array>
+#include <cstdint>

 extern "C" {
 #include "internals.h"
@@ -44,21 +46,375 @@ extern "C" {
 #include <limits>

 using this_t = uint8_t*;
-// this does not inlcude any reserved rm or the DYN rm, as DYN rm should be taken care of in the vm_impl
-const std::array<uint8_t, 5> rmm_map = {
-    softfloat_round_near_even /*RNE*/, softfloat_round_minMag /*RTZ*/, softfloat_round_min /*RDN*/, softfloat_round_max /*RUP?*/,
-    softfloat_round_near_maxMag /*RMM*/
-};
+template <typename T> T constexpr defaultNaN();
+template <> uint16_t constexpr defaultNaN<uint16_t>() { return defaultNaNF16UI; }
+template <> uint32_t constexpr defaultNaN<uint32_t>() { return defaultNaNF32UI; }
+template <> uint64_t constexpr defaultNaN<uint64_t>() { return defaultNaNF64UI; }
+template <typename T> T constexpr posInf();
+template <> uint16_t constexpr posInf<uint16_t>() { return 0x7C00; }
+template <> uint32_t constexpr posInf<uint32_t>() { return 0x7F800000; }
+template <> uint64_t constexpr posInf<uint64_t>() { return 0x7FF0000000000000; }
+template <typename T> T constexpr negInf();
+template <> uint16_t constexpr negInf<uint16_t>() { return 0xFC00; }
+template <> uint32_t constexpr negInf<uint32_t>() { return 0xFF800000; }
+template <> uint64_t constexpr negInf<uint64_t>() { return 0xFFF0000000000000; }
+template <typename T> T constexpr negZero();
+template <> uint16_t constexpr negZero<uint16_t>() { return 0x8000; }
+template <> uint32_t constexpr negZero<uint32_t>() { return 0x80000000; }
+template <> uint64_t constexpr negZero<uint64_t>() { return 0x8000000000000000; }

-const uint32_t quiet_nan32 = 0x7fC00000;
+template <typename T> bool rsqrt_check(T fclass_val, bool& subnormal, T& ret_val) {
+    softfloat_exceptionFlags = 0;
+    switch(fclass_val) {
+    case 0x0001: {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        ret_val = defaultNaN<T>();
+        return true;
+    }
+    case 0x0002: {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        ret_val = defaultNaN<T>();
+        return true;
+    }
+    case 0x0004: {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        ret_val = defaultNaN<T>();
+        return true;
+    }
+    case 0x0100: {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        ret_val = defaultNaN<T>();
+        return true;
+    }
+    case 0x0200: {
+        ret_val = defaultNaN<T>();
+        return true;
+    }
+    case 0x0008: {
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        ret_val = negInf<T>();
+        return true;
+    }
+    case 0x0010: {
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        ret_val = posInf<T>();
+        return true;
+    }
+    case 0x0080: {
+        ret_val = 0;
+        return true;
+    }
+    case 0x0020: {
+        subnormal = true;
+    }
+    default:
+        return false;
+    }
+}
+static constexpr std::array<std::array<uint64_t, 64>, 2> rsqrt_table{
+    {{
+         52, 51, 50, 48, 47, 46, 44, 43, 42, 41, 40, 39, 38, 36, 35, 34, 33, 32, 31, 30, 30, 29, 28, 27, 26, 25, 24, 23, 23, 22, 21, 20,
+         19, 19, 18, 17, 16, 16, 15, 14, 14, 13, 12, 12, 11, 10, 10, 9,  9,  8,  7,  7,  6,  6,  5,  4,  4,  3,  3,  2,  2,  1,  1,  0,
+     },
+     {127, 125, 123, 121, 119, 118, 116, 114, 113, 111, 109, 108, 106, 105, 103, 102, 100, 99, 97, 96, 95, 93,
+      92,  91,  90,  88,  87,  86,  85,  84,  83,  82,  80,  79,  78,  77,  76,  75,  74,  73, 72, 71, 70, 70,
+      69,  68,  67,  66,  65,  64,  63,  63,  62,  61,  60,  59,  59,  58,  57,  56,  56,  55, 54, 53}}};
+
+uint64_t constexpr frsqrt7_general(const unsigned s, const unsigned e, const uint64_t sign, const int64_t exp, const uint64_t sig,
+                                   const bool subnormal) {
+    int64_t normalized_exp = exp;
+    uint64_t normalized_sig = sig;
+    if(subnormal) {
+        signed nr_leadingzeros = __builtin_clzll(sig) - (64 - s);
+        normalized_exp = -nr_leadingzeros;
+        normalized_sig = (sig << (1 + nr_leadingzeros)) & ((1ULL << s) - 1);
+    }
+    unsigned exp_idx = normalized_exp & 1;
+    unsigned sig_idx = (normalized_sig >> (s - 6)) & 0x3f;
+    // The output of the table becomes the seven high bits of the result significand (after the leading one); the remainder of the
+    // result significand is zero.
+    uint64_t out_sig = rsqrt_table[exp_idx][sig_idx] << (s - 7);
+    // The output exponent equals floor((3*B - 1 - the normalized input exponent) / 2), where B is the exponent bias.
+    unsigned bias = (1UL << (e - 1)) - 1;
+    uint64_t out_exp = (3 * bias - 1 - normalized_exp) / 2;
+    // The output sign equals the input sign.
+    return (sign << (s + e)) | (out_exp << s) | out_sig;
+}
+template <typename T> bool recip_check(T fclass_val, bool& subnormal, uint64_t& ret_val) {
+    softfloat_exceptionFlags = 0;
+    switch(fclass_val) {
+    case 0x0001: {
+        ret_val = negZero<T>();
+        return true;
+    }
+    case 0x0080: {
+        ret_val = 0;
+        return true;
+    }
+    case 0x0008: {
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        ret_val = negInf<T>();
+        return true;
+    }
+    case 0x0010: {
+        softfloat_exceptionFlags |= softfloat_flag_infinite;
+        ret_val = posInf<T>();
+        return true;
+    }
+    case 0x0100: {
+        softfloat_exceptionFlags |= softfloat_flag_invalid;
+        ret_val = defaultNaN<T>();
+        return true;
+    }
+    case 0x0200: {
+        ret_val = defaultNaN<T>();
+        return true;
+    }
+    case 0x0004: {
+        subnormal = true;
+        return false;
+    }
+    case 0x0020: {
+        subnormal = true;
+        return false;
+    }
+    default: {
+        subnormal = false;
+        return false;
+    }
+    }
+}
+static constexpr std::array<uint64_t, 128> rec_table{
+    {127, 125, 123, 121, 119, 117, 116, 114, 112, 110, 109, 107, 105, 104, 102, 100, 99, 97, 96, 94, 93, 91, 90, 88, 87, 85,
+     84,  83,  81,  80,  79,  77,  76,  75,  74,  72,  71,  70,  69,  68,  66,  65,  64, 63, 62, 61, 60, 59, 58, 57, 56, 55,
+     54,  53,  52,  51,  50,  49,  48,  47,  46,  45,  44,  43,  42,  41,  40,  40,  39, 38, 37, 36, 35, 35, 34, 33, 32, 31,
+     31,  30,  29,  28,  28,  27,  26,  25,  25,  24,  23,  23,  22,  21,  21,  20,  19, 19, 18, 17, 17, 16, 15, 15, 14, 14,
+     13,  12,  12,  11,  11,  10,  9,   9,   8,   8,   7,   7,   6,   5,   5,   4,   4,  3,  3,  2,  2,  1,  1,  0}};
+bool frec_general(uint64_t& res, const unsigned s, const unsigned e, const uint64_t sign, const int64_t exp, const uint64_t sig,
+                  const bool subnormal, uint8_t mode) {
+    int nr_leadingzeros = __builtin_clzll(sig) - (64 - s);
+    int64_t normalized_exp = subnormal ? -nr_leadingzeros : exp;
+    uint64_t normalized_sig = subnormal ? ((sig << (1 + nr_leadingzeros)) & ((1ULL << s) - 1)) : sig;
+    unsigned idx = (normalized_sig >> (s - 7)) & 0x7f;
+    unsigned bias = (1UL << (e - 1)) - 1;
+    uint64_t mid_exp = 2 * (bias)-1 - normalized_exp;
+    uint64_t mid_sig = rec_table[idx] << (s - 7);
+
+    uint64_t out_exp = mid_exp;
+    uint64_t out_sig = mid_sig;
+    if(mid_exp == 0) {
+        out_exp = mid_exp;
+        out_sig = (mid_sig >> 1) | (1ULL << (s - 1));
+    } else if(mid_exp == (1ULL << e) - 1) {
+        out_exp = 0;
+        out_sig = (mid_sig >> 2) | (1ULL << (s - 2));
+    }
+    if(subnormal && nr_leadingzeros > 1) {
+        if((mode == 0b001) || (mode == 0b010 && sign == 0b0) || (mode == 0b011 && sign == 0b1)) {
+            res = (sign << (s + e)) | ((1ULL << (e - 1)) - 1) << s | ((1ULL << s) - 1);
+            return true;
+        } else {
+            res = (sign << (s + e)) | ((1ULL << e) - 1) << s;
+            return true;
+        }
+    }
+    res = (sign << (s + e)) | (out_exp << s) | out_sig;
+    return false;
+}

 extern "C" {

 uint32_t fget_flags() { return softfloat_exceptionFlags & 0x1f; }
+uint16_t fadd_h(uint16_t v1, uint16_t v2, uint8_t mode) {
+    float16_t v1f{v1}, v2f{v2};
+    softfloat_roundingMode = mode;
+    softfloat_exceptionFlags = 0;
+    float16_t r = f16_add(v1f, v2f);
+    return r.v;
+}
+
+uint16_t fsub_h(uint16_t v1, uint16_t v2, uint8_t mode) {
+    float16_t v1f{v1}, v2f{v2};
+    softfloat_roundingMode = mode;
+    softfloat_exceptionFlags = 0;
+    float16_t r = f16_sub(v1f, v2f);
+    return r.v;
+}
+
+uint16_t fmul_h(uint16_t v1, uint16_t v2, uint8_t mode) {
+    float16_t v1f{v1}, v2f{v2};
+    softfloat_roundingMode = mode;
+    softfloat_exceptionFlags = 0;
+    float16_t r = f16_mul(v1f, v2f);
+    return r.v;
+}
+
+uint16_t fdiv_h(uint16_t v1, uint16_t v2, uint8_t mode) {
+    float16_t v1f{v1}, v2f{v2};
+    softfloat_roundingMode = mode;
+    softfloat_exceptionFlags = 0;
+    float16_t r = f16_div(v1f, v2f);
+    return r.v;
+}
+
+uint16_t fsqrt_h(uint16_t v1, uint8_t mode) {
+    float16_t v1f{v1};
+    softfloat_roundingMode = mode;
+    softfloat_exceptionFlags = 0;
+    float16_t r = f16_sqrt(v1f);
+    return r.v;
+}
+
+uint16_t fcmp_h(uint16_t v1, uint16_t v2, uint16_t op) {
+    float16_t v1f{v1}, v2f{v2};
+    softfloat_exceptionFlags = 0;
+    bool nan = v1 == defaultNaNF16UI || v2 & defaultNaNF16UI;
+    bool snan = softfloat_isSigNaNF16UI(v1) || softfloat_isSigNaNF16UI(v2);
+    switch(op) {
+    case 0:
+        if(nan | snan) {
+            if(snan)
+                softfloat_raiseFlags(softfloat_flag_invalid);
+            return 0;
+        } else
+            return f16_eq(v1f, v2f) ? 1 : 0;
+    case 1:
+        if(nan | snan) {
+            softfloat_raiseFlags(softfloat_flag_invalid);
+            return 0;
+        } else
+            return f16_le(v1f, v2f) ? 1 : 0;
+    case 2:
+        if(nan | snan) {
+            softfloat_raiseFlags(softfloat_flag_invalid);
+            return 0;
+        } else
+            return f16_lt(v1f, v2f) ? 1 : 0;
+    default:
+        break;
+    }
+    return -1;
+}
+
+uint16_t fmadd_h(uint16_t v1, uint16_t v2, uint16_t v3, uint16_t op, uint8_t mode) {
+    uint16_t F16_SIGN = 1UL << 15;
+    switch(op) {
+    case 0: // FMADD_S
+        break;
+    case 1: // FMSUB_S
+        v3 ^= F16_SIGN;
+        break;
+    case 2: // FNMADD_S
+        v1 ^= F16_SIGN;
+        v3 ^= F16_SIGN;
+        break;
+    case 3: // FNMSUB_S
+        v1 ^= F16_SIGN;
+        break;
+    }
+    softfloat_roundingMode = mode;
+    softfloat_exceptionFlags = 0;
+    float16_t res = softfloat_mulAddF16(v1, v2, v3, 0);
+    return res.v;
+}
+
+uint16_t fsel_h(uint16_t v1, uint16_t v2, uint16_t op) {
+    softfloat_exceptionFlags = 0;
+    bool v1_nan = (v1 & defaultNaNF16UI) == defaultNaNF16UI;
+    bool v2_nan = (v2 & defaultNaNF16UI) == defaultNaNF16UI;
+    bool v1_snan = softfloat_isSigNaNF16UI(v1);
+    bool v2_snan = softfloat_isSigNaNF16UI(v2);
+    if(v1_snan || v2_snan)
+        softfloat_raiseFlags(softfloat_flag_invalid);
+    if(v1_nan || v1_snan)
+        return (v2_nan || v2_snan) ? defaultNaNF16UI : v2;
+    else if(v2_nan || v2_snan)
+        return v1;
+    else {
+        if((v1 & 0x7fff) == 0 && (v2 & 0x7fff) == 0) {
+            return op == 0 ? ((v1 & 0x8000) ? v1 : v2) : ((v1 & 0x8000) ? v2 : v1);
+        } else {
+            float16_t v1f{v1}, v2f{v2};
+            return op == 0 ? (f16_lt(v1f, v2f) ? v1 : v2) : (f16_lt(v1f, v2f) ? v2 : v1);
+        }
+    }
+}
+
+uint16_t fclass_h(uint16_t v1) {
+
+    float16_t a{v1};
+    union ui16_f16 uA;
+    uint_fast16_t uiA;
+
+    uA.f = a;
+    uiA = uA.ui;
+
+    bool infOrNaN = expF16UI(uiA) == 0x1F;
+    bool subnormalOrZero = expF16UI(uiA) == 0;
+    bool sign = signF16UI(uiA);
+    bool fracZero = fracF16UI(uiA) == 0;
+    bool isNaN = isNaNF16UI(uiA);
+    bool isSNaN = softfloat_isSigNaNF16UI(uiA);
+
+    return (sign && infOrNaN && fracZero) << 0 | (sign && !infOrNaN && !subnormalOrZero) << 1 |
+           (sign && subnormalOrZero && !fracZero) << 2 | (sign && subnormalOrZero && fracZero) << 3 | (!sign && infOrNaN && fracZero) << 7 |
+           (!sign && !infOrNaN && !subnormalOrZero) << 6 | (!sign && subnormalOrZero && !fracZero) << 5 |
+           (!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 | (isNaN && !isSNaN) << 9;
+}
+
+uint16_t frsqrt7_h(uint16_t v) {
+    bool subnormal = false;
+    uint16_t ret_val = 0;
+    if(rsqrt_check(fclass_h(v), subnormal, ret_val)) {
+        return ret_val;
+    }
+    uint16_t sig = fracF64UI(v);
+    int16_t exp = expF64UI(v);
+    uint16_t sign = signF64UI(v);
+    unsigned constexpr e = 5;
+    unsigned constexpr s = 10;
+    return frsqrt7_general(s, e, sign, exp, sig, subnormal);
+}
+
+uint16_t frec7_h(uint16_t v, uint8_t mode) {
+    bool subnormal = false;
+    uint64_t ret_val = 0;
+    if(recip_check(fclass_h(v), subnormal, ret_val)) {
+        return ret_val;
+    }
+    uint16_t sig = fracF16UI(v);
+    int exp = expF16UI(v);
+    uint16_t sign = signF16UI(v);
+    unsigned constexpr e = 5;
+    unsigned constexpr s = 10;
+    if(frec_general(ret_val, s, e, sign, exp, sig, subnormal, mode))
+        softfloat_exceptionFlags |= (softfloat_flag_inexact | softfloat_flag_overflow);
+    return ret_val;
+}
+
+uint16_t unbox_h(uint8_t FLEN, uint64_t v) {
+    uint64_t mask = 0;
+    switch(FLEN) {
+    case 32: {
+        mask = std::numeric_limits<uint32_t>::max() & ~((uint64_t)std::numeric_limits<uint16_t>::max());
+        break;
+    }
+    case 64: {
+        mask = std::numeric_limits<uint64_t>::max() & ~((uint64_t)std::numeric_limits<uint16_t>::max());
+        break;
+    }
+    default:
+        break;
+    }
+    if((v & mask) != mask)
+        return defaultNaNF16UI;
+    else
+        return v & std::numeric_limits<uint32_t>::max();
+}

 uint32_t fadd_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float32_t r = f32_add(v1f, v2f);
    return r.v;
@@ -66,7 +422,7 @@ uint32_t fadd_s(uint32_t v1, uint32_t v2, uint8_t mode) {

 uint32_t fsub_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float32_t r = f32_sub(v1f, v2f);
    return r.v;
@@ -74,7 +430,7 @@ uint32_t fsub_s(uint32_t v1, uint32_t v2, uint8_t mode) {

 uint32_t fmul_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float32_t r = f32_mul(v1f, v2f);
    return r.v;
@@ -82,7 +438,7 @@ uint32_t fmul_s(uint32_t v1, uint32_t v2, uint8_t mode) {

 uint32_t fdiv_s(uint32_t v1, uint32_t v2, uint8_t mode) {
    float32_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float32_t r = f32_div(v1f, v2f);
    return r.v;
@@ -90,7 +446,7 @@ uint32_t fdiv_s(uint32_t v1, uint32_t v2, uint8_t mode) {

 uint32_t fsqrt_s(uint32_t v1, uint8_t mode) {
    float32_t v1f{v1};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float32_t r = f32_sqrt(v1f);
    return r.v;
@@ -99,7 +455,7 @@ uint32_t fsqrt_s(uint32_t v1, uint8_t mode) {
 uint32_t fcmp_s(uint32_t v1, uint32_t v2, uint32_t op) {
    float32_t v1f{v1}, v2f{v2};
    softfloat_exceptionFlags = 0;
-    bool nan = (v1 & defaultNaNF32UI) == quiet_nan32 || (v2 & defaultNaNF32UI) == quiet_nan32;
+    bool nan = v1 == defaultNaNF32UI || v2 == defaultNaNF32UI;
    bool snan = softfloat_isSigNaNF32UI(v1) || softfloat_isSigNaNF32UI(v2);
    switch(op) {
    case 0:
@@ -127,29 +483,6 @@ uint32_t fcmp_s(uint32_t v1, uint32_t v2, uint32_t op) {
    return -1;
 }

-uint32_t fcvt_s(uint32_t v1, uint32_t op, uint8_t mode) {
-    float32_t v1f{v1};
-    softfloat_exceptionFlags = 0;
-    float32_t r;
-    switch(op) {
-    case 0: { // FCVT__W__S
-        uint_fast32_t res = f32_to_i32(v1f, rmm_map.at(mode), true);
-        return (uint32_t)res;
-    }
-    case 1: { // FCVT__WU__S
-        uint_fast32_t res = f32_to_ui32(v1f, rmm_map.at(mode), true);
-        return (uint32_t)res;
-    }
-    case 2: // FCVT__S__W
-        r = i32_to_f32((int32_t)v1);
-        return r.v;
-    case 3: // FCVT__S__WU
-        r = ui32_to_f32(v1);
-        return r.v;
-    }
-    return 0;
-}
-
 uint32_t fmadd_s(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t op, uint8_t mode) {
    uint32_t F32_SIGN = 1UL << 31;
    switch(op) {
@@ -166,7 +499,7 @@ uint32_t fmadd_s(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t op, uint8_t mod
        v1 ^= F32_SIGN;
        break;
    }
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float32_t res = softfloat_mulAddF32(v1, v2, v3, 0);
    return res.v;
@@ -216,37 +549,60 @@ uint32_t fclass_s(uint32_t v1) {
           (!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 | (isNaN && !isSNaN) << 9;
 }

-uint32_t fconv_d2f(uint64_t v1, uint8_t mode) {
-    bool isNan = isNaNF64UI(v1);
-    bool isSNaN = softfloat_isSigNaNF64UI(v1);
-    softfloat_roundingMode = rmm_map.at(mode);
-    softfloat_exceptionFlags = 0;
-    if(isNan) {
-        if(isSNaN)
-            softfloat_raiseFlags(softfloat_flag_invalid);
-        return defaultNaNF32UI;
-    } else {
-        float32_t res = f64_to_f32(float64_t{v1});
-        return res.v;
+uint32_t frsqrt7_s(uint32_t v) {
+    bool subnormal = false;
+    uint32_t ret_val = 0;
+    if(rsqrt_check(fclass_s(v), subnormal, ret_val)) {
+        return ret_val;
    }
+    uint32_t sig = fracF32UI(v);
+    int exp = expF32UI(v);
+    uint32_t sign = signF32UI(v);
+    unsigned constexpr e = 8;
+    unsigned constexpr s = 23;
+    return frsqrt7_general(s, e, sign, exp, sig, subnormal);
 }

-uint64_t fconv_f2d(uint32_t v1, uint8_t mode) {
-    bool infOrNaN = expF32UI(v1) == 0xFF;
-    bool subnormalOrZero = expF32UI(v1) == 0;
-    if(infOrNaN || subnormalOrZero) {
-        return defaultNaNF64UI;
-    } else {
-        float64_t res = f32_to_f64(float32_t{v1});
-        return res.v;
+uint32_t frec7_s(uint32_t v, uint8_t mode) {
+    bool subnormal = false;
+    uint64_t ret_val = 0;
+    if(recip_check(fclass_s(v), subnormal, ret_val)) {
+        return ret_val;
    }
+    uint32_t sig = fracF32UI(v);
+    int exp = expF32UI(v);
+    uint32_t sign = signF32UI(v);
+    unsigned constexpr e = 8;
+    unsigned constexpr s = 23;
+    if(frec_general(ret_val, s, e, sign, exp, sig, subnormal, mode))
+        softfloat_exceptionFlags |= (softfloat_flag_inexact | softfloat_flag_overflow);
+    return ret_val;
+}
+
+uint32_t unbox_s(uint8_t FLEN, uint64_t v) {
+    uint64_t mask = 0;
+    switch(FLEN) {
+    case 32: {
+        return v;
+    }
+    case 64: {
+        mask = std::numeric_limits<uint64_t>::max() & ~((uint64_t)std::numeric_limits<uint32_t>::max());
+        break;
+    }
+    default:
+        break;
+    }
+    if((v & mask) != mask)
+        return defaultNaNF32UI;
+    else
+        return v & std::numeric_limits<uint32_t>::max();
 }

 uint64_t fadd_d(uint64_t v1, uint64_t v2, uint8_t mode) {
-    bool nan = (v1 & defaultNaNF32UI) == quiet_nan32;
+    bool nan = v1 == defaultNaNF32UI;
    bool snan = softfloat_isSigNaNF32UI(v1);
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float64_t r = f64_add(v1f, v2f);
    return r.v;
@@ -254,7 +610,7 @@ uint64_t fadd_d(uint64_t v1, uint64_t v2, uint8_t mode) {

 uint64_t fsub_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float64_t r = f64_sub(v1f, v2f);
    return r.v;
@@ -262,7 +618,7 @@ uint64_t fsub_d(uint64_t v1, uint64_t v2, uint8_t mode) {

 uint64_t fmul_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float64_t r = f64_mul(v1f, v2f);
    return r.v;
@@ -270,7 +626,7 @@ uint64_t fmul_d(uint64_t v1, uint64_t v2, uint8_t mode) {

 uint64_t fdiv_d(uint64_t v1, uint64_t v2, uint8_t mode) {
    float64_t v1f{v1}, v2f{v2};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float64_t r = f64_div(v1f, v2f);
    return r.v;
@@ -278,7 +634,7 @@ uint64_t fdiv_d(uint64_t v1, uint64_t v2, uint8_t mode) {

 uint64_t fsqrt_d(uint64_t v1, uint8_t mode) {
    float64_t v1f{v1};
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float64_t r = f64_sqrt(v1f);
    return r.v;
@@ -287,7 +643,7 @@ uint64_t fsqrt_d(uint64_t v1, uint8_t mode) {
 uint64_t fcmp_d(uint64_t v1, uint64_t v2, uint32_t op) {
    float64_t v1f{v1}, v2f{v2};
    softfloat_exceptionFlags = 0;
-    bool nan = (v1 & defaultNaNF64UI) == quiet_nan32 || (v2 & defaultNaNF64UI) == quiet_nan32;
+    bool nan = v1 == defaultNaNF64UI || v2 == defaultNaNF64UI;
    bool snan = softfloat_isSigNaNF64UI(v1) || softfloat_isSigNaNF64UI(v2);
    switch(op) {
    case 0:
@@ -315,30 +671,6 @@ uint64_t fcmp_d(uint64_t v1, uint64_t v2, uint32_t op) {
    return -1;
 }

-uint64_t fcvt_d(uint64_t v1, uint32_t op, uint8_t mode) {
-
-    float64_t v1f{v1};
-    softfloat_exceptionFlags = 0;
-    float64_t r;
-    switch(op) {
-    case 0: { // l from d
-        int64_t res = f64_to_i64(v1f, rmm_map.at(mode), true);
-        return (uint64_t)res;
-    }
-    case 1: { // lu from d
-        uint64_t res = f64_to_ui64(v1f, rmm_map.at(mode), true);
-        return res;
-    }
-    case 2: // d from l
-        r = i64_to_f64(v1);
-        return r.v;
-    case 3: // d from lu
-        r = ui64_to_f64(v1);
-        return r.v;
-    }
-    return 0;
-}
-
 uint64_t fmadd_d(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mode) {
    uint64_t F64_SIGN = 1ULL << 63;
    switch(op) {
@@ -355,7 +687,7 @@ uint64_t fmadd_d(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mod
        v1 ^= F64_SIGN;
        break;
    }
-    softfloat_roundingMode = rmm_map.at(mode);
+    softfloat_roundingMode = mode;
    softfloat_exceptionFlags = 0;
    float64_t res = softfloat_mulAddF64(v1, v2, v3, 0);
    return res.v;
@@ -406,52 +738,211 @@ uint64_t fclass_d(uint64_t v1) {
           (!sign && subnormalOrZero && fracZero) << 4 | (isNaN && isSNaN) << 8 | (isNaN && !isSNaN) << 9;
 }

-uint64_t fcvt_32_64(uint32_t v1, uint32_t op, uint8_t mode) {
-    float32_t v1f{v1};
-    softfloat_exceptionFlags = 0;
-    float64_t r;
-    switch(op) {
-    case 0: // l->s, fp to int32
-        return f32_to_i64(v1f, rmm_map.at(mode), true);
-    case 1: // wu->s
-        return f32_to_ui64(v1f, rmm_map.at(mode), true);
-    case 2: // s->w
-        r = i32_to_f64(v1);
-        return r.v;
-    case 3: // s->wu
-        r = ui32_to_f64(v1);
-        return r.v;
+uint64_t frsqrt7_d(uint64_t v) {
+    bool subnormal = false;
+    uint64_t ret_val = 0;
+    if(rsqrt_check(fclass_d(v), subnormal, ret_val)) {
+        return ret_val;
    }
-    return 0;
+    uint64_t sig = fracF64UI(v);
+    int exp = expF64UI(v);
+    uint64_t sign = signF64UI(v);
+    unsigned constexpr e = 11;
+    unsigned constexpr s = 52;
+    return frsqrt7_general(s, e, sign, exp, sig, subnormal);
 }

-uint32_t fcvt_64_32(uint64_t v1, uint32_t op, uint8_t mode) {
-    softfloat_exceptionFlags = 0;
-    float32_t r;
-    switch(op) {
-    case 0: { // wu->s
-        int32_t r = f64_to_i32(float64_t{v1}, rmm_map.at(mode), true);
-        return r;
+uint64_t frec7_d(uint64_t v, uint8_t mode) {
+    bool subnormal = false;
+    uint64_t ret_val = 0;
+    if(recip_check(fclass_d(v), subnormal, ret_val)) {
+        return ret_val;
    }
-    case 1: { // wu->s
-        uint32_t r = f64_to_ui32(float64_t{v1}, rmm_map.at(mode), true);
-        return r;
-    }
-    case 2: // l->s, fp to int32
-        r = i64_to_f32(v1);
-        return r.v;
-    case 3: // wu->s
-        r = ui64_to_f32(v1);
-        return r.v;
-    }
-    return 0;
+    uint64_t sig = fracF64UI(v);
+    int exp = expF64UI(v);
+    uint64_t sign = signF64UI(v);
+    unsigned constexpr e = 11;
+    unsigned constexpr s = 52;
+    if(frec_general(ret_val, s, e, sign, exp, sig, subnormal, mode))
+        softfloat_exceptionFlags |= (softfloat_flag_inexact | softfloat_flag_overflow);
+    return ret_val;
 }

-uint32_t unbox_s(uint64_t v) {
-    constexpr uint64_t mask = std::numeric_limits<uint64_t>::max() & ~((uint64_t)std::numeric_limits<uint32_t>::max());
+uint64_t unbox_d(uint8_t FLEN, uint64_t v) {
+    uint64_t mask = 0;
+    switch(FLEN) {
+    case 64: {
+        return v;
+        break;
+    }
+    default:
+        break;
+    }
    if((v & mask) != mask)
-        return 0x7fc00000;
+        return defaultNaNF64UI;
    else
        return v & std::numeric_limits<uint32_t>::max();
 }
+
+// conversion: float to float
+uint32_t f16tof32(uint16_t val, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f16_to_f32(float16_t{val}).v;
 }
+uint64_t f16tof64(uint16_t val, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f16_to_f64(float16_t{val}).v;
+}
+
+uint16_t f32tof16(uint32_t val, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f32_to_f16(float32_t{val}).v;
+}
+uint64_t f32tof64(uint32_t val, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f32_to_f64(float32_t{val}).v;
+}
+
+uint16_t f64tof16(uint64_t val, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f64_to_f16(float64_t{val}).v;
+}
+uint32_t f64tof32(uint64_t val, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f64_to_f32(float64_t{val}).v;
+}
+
+// conversions: float to unsigned
+uint32_t f16toui32(uint16_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f16_to_ui32(float16_t{v}, rm, true);
+}
+uint64_t f16toui64(uint16_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f16_to_ui64(float16_t{v}, rm, true);
+}
+uint32_t f32toui32(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f32_to_ui32(float32_t{v}, rm, true);
+}
+uint64_t f32toui64(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f32_to_ui64(float32_t{v}, rm, true);
+}
+uint32_t f64toui32(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f64_to_ui32(float64_t{v}, rm, true);
+}
+uint64_t f64toui64(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f64_to_ui64(float64_t{v}, rm, true);
+}
+
+// conversions: float to signed
+uint32_t f16toi32(uint16_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f16_to_i32(float16_t{v}, rm, true);
+}
+uint64_t f16toi64(uint16_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f16_to_i64(float16_t{v}, rm, true);
+}
+uint32_t f32toi32(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f32_to_i32(float32_t{v}, rm, true);
+}
+uint64_t f32toi64(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f32_to_i64(float32_t{v}, rm, true);
+}
+uint32_t f64toi32(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f64_to_i32(float64_t{v}, rm, true);
+}
+uint64_t f64toi64(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return f64_to_i64(float64_t{v}, rm, true);
+}
+
+// conversions: unsigned to float
+uint16_t ui32tof16(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return ui32_to_f16(v).v;
+}
+uint16_t ui64tof16(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return ui64_to_f16(v).v;
+}
+uint32_t ui32tof32(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return ui32_to_f32(v).v;
+}
+uint32_t ui64tof32(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return ui64_to_f32(v).v;
+}
+uint64_t ui32tof64(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return ui32_to_f64(v).v;
+}
+uint64_t ui64tof64(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return ui64_to_f64(v).v;
+}
+
+// conversions: signed to float
+uint16_t i32tof16(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return i32_to_f16(v).v;
+}
+uint16_t i64tof16(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return i64_to_f16(v).v;
+}
+uint32_t i32tof32(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return i32_to_f32(v).v;
+}
+uint32_t i64tof32(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return i64_to_f32(v).v;
+}
+uint64_t i32tof64(uint32_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return i32_to_f64(v).v;
+}
+uint64_t i64tof64(uint64_t v, uint8_t rm) {
+    softfloat_exceptionFlags = 0;
+    softfloat_roundingMode = rm;
+    return i64_to_f64(v).v;
+}
+}
@@ -39,30 +39,88 @@

 extern "C" {
 uint32_t fget_flags();
+
+// half precision
+uint16_t fadd_h(uint16_t v1, uint16_t v2, uint8_t mode);
+uint16_t fsub_h(uint16_t v1, uint16_t v2, uint8_t mode);
+uint16_t fmul_h(uint16_t v1, uint16_t v2, uint8_t mode);
+uint16_t fdiv_h(uint16_t v1, uint16_t v2, uint8_t mode);
+uint16_t fsqrt_h(uint16_t v1, uint8_t mode);
+uint16_t fcmp_h(uint16_t v1, uint16_t v2, uint16_t op);
+uint16_t fmadd_h(uint16_t v1, uint16_t v2, uint16_t v3, uint16_t op, uint8_t mode);
+uint16_t fsel_h(uint16_t v1, uint16_t v2, uint16_t op);
+uint16_t fsqrt_h(uint16_t v1, uint8_t mode);
+uint16_t fclass_h(uint16_t v1);
+uint16_t frsqrt7_h(uint16_t v);
+uint16_t frec7_h(uint16_t v, uint8_t mode);
+uint16_t unbox_h(uint8_t FLEN, uint64_t v);
+
+// single precision
 uint32_t fadd_s(uint32_t v1, uint32_t v2, uint8_t mode);
 uint32_t fsub_s(uint32_t v1, uint32_t v2, uint8_t mode);
 uint32_t fmul_s(uint32_t v1, uint32_t v2, uint8_t mode);
 uint32_t fdiv_s(uint32_t v1, uint32_t v2, uint8_t mode);
 uint32_t fsqrt_s(uint32_t v1, uint8_t mode);
 uint32_t fcmp_s(uint32_t v1, uint32_t v2, uint32_t op);
-uint32_t fcvt_s(uint32_t v1, uint32_t op, uint8_t mode);
 uint32_t fmadd_s(uint32_t v1, uint32_t v2, uint32_t v3, uint32_t op, uint8_t mode);
 uint32_t fsel_s(uint32_t v1, uint32_t v2, uint32_t op);
 uint32_t fclass_s(uint32_t v1);
-uint32_t fconv_d2f(uint64_t v1, uint8_t mode);
-uint64_t fconv_f2d(uint32_t v1, uint8_t mode);
+uint32_t frsqrt7_s(uint32_t v);
+uint32_t frec7_s(uint32_t v, uint8_t mode);
+uint32_t unbox_s(uint8_t FLEN, uint64_t v);
+
+// double precision
 uint64_t fadd_d(uint64_t v1, uint64_t v2, uint8_t mode);
 uint64_t fsub_d(uint64_t v1, uint64_t v2, uint8_t mode);
 uint64_t fmul_d(uint64_t v1, uint64_t v2, uint8_t mode);
 uint64_t fdiv_d(uint64_t v1, uint64_t v2, uint8_t mode);
 uint64_t fsqrt_d(uint64_t v1, uint8_t mode);
 uint64_t fcmp_d(uint64_t v1, uint64_t v2, uint32_t op);
-uint64_t fcvt_d(uint64_t v1, uint32_t op, uint8_t mode);
 uint64_t fmadd_d(uint64_t v1, uint64_t v2, uint64_t v3, uint32_t op, uint8_t mode);
 uint64_t fsel_d(uint64_t v1, uint64_t v2, uint32_t op);
 uint64_t fclass_d(uint64_t v1);
-uint64_t fcvt_32_64(uint32_t v1, uint32_t op, uint8_t mode);
-uint32_t fcvt_64_32(uint64_t v1, uint32_t op, uint8_t mode);
-uint32_t unbox_s(uint64_t v);
+uint64_t frsqrt7_d(uint64_t v);
+uint64_t frec7_d(uint64_t v, uint8_t mode);
+uint64_t unbox_d(uint8_t FLEN, uint64_t v);
+
+// conversion: float to float
+uint32_t f16tof32(uint16_t val, uint8_t rm);
+uint64_t f16tof64(uint16_t val, uint8_t rm);
+uint16_t f32tof16(uint32_t val, uint8_t rm);
+uint64_t f32tof64(uint32_t val, uint8_t rm);
+uint16_t f64tof16(uint64_t val, uint8_t rm);
+uint32_t f64tof32(uint64_t val, uint8_t rm);
+
+// conversions: float to unsigned
+uint32_t f16toui32(uint16_t v, uint8_t rm);
+uint64_t f16toui64(uint16_t v, uint8_t rm);
+uint32_t f32toui32(uint32_t v, uint8_t rm);
+uint64_t f32toui64(uint32_t v, uint8_t rm);
+uint32_t f64toui32(uint64_t v, uint8_t rm);
+uint64_t f64toui64(uint64_t v, uint8_t rm);
+
+// conversions: float to signed
+uint32_t f16toi32(uint16_t v, uint8_t rm);
+uint64_t f16toi64(uint16_t v, uint8_t rm);
+uint32_t f32toi32(uint32_t v, uint8_t rm);
+uint64_t f32toi64(uint32_t v, uint8_t rm);
+uint32_t f64toi32(uint64_t v, uint8_t rm);
+uint64_t f64toi64(uint64_t v, uint8_t rm);
+
+// conversions: unsigned to float
+uint16_t ui32tof16(uint32_t v, uint8_t rm);
+uint16_t ui64tof16(uint64_t v, uint8_t rm);
+uint32_t ui32tof32(uint32_t v, uint8_t rm);
+uint32_t ui64tof32(uint64_t v, uint8_t rm);
+uint64_t ui32tof64(uint32_t v, uint8_t rm);
+uint64_t ui64tof64(uint64_t v, uint8_t rm);
+
+// conversions: signed to float
+uint16_t i32tof16(uint32_t v, uint8_t rm);
+uint16_t i64tof16(uint64_t v, uint8_t rm);
+uint32_t i32tof32(uint32_t v, uint8_t rm);
+uint32_t i64tof32(uint64_t v, uint8_t rm);
+uint64_t i32tof64(uint32_t v, uint8_t rm);
+uint64_t i64tof64(uint64_t v, uint8_t rm);
 }
-#endif /* RISCV_SRC_VM_FP_FUNCTIONS_H_ */
+#endif /* _VM_FP_FUNCTIONS_H_ */
@@ -97,7 +97,7 @@ protected:
        return super::gen_cond_assign(cond, this->gen_ext(trueVal, size), this->gen_ext(falseVal, size));
    }

-    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, unsigned int &, BasicBlock *) override;
+    std::tuple<continuation_e, BasicBlock *> gen_single_inst_behavior(virt_addr_t &, BasicBlock *) override;

    void gen_leave_behavior(BasicBlock *leave_blk) override;
    void gen_raise_trap(uint16_t trap_id, uint16_t cause);
@@ -4937,21 +4937,20 @@ vm_impl<ARCH>::vm_impl(ARCH &core, unsigned core_id, unsigned cluster_id)

 template <typename ARCH>
 std::tuple<continuation_e, BasicBlock *>
-vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, unsigned int &inst_cnt, BasicBlock *this_block) {
+vm_impl<ARCH>::gen_single_inst_behavior(virt_addr_t &pc, BasicBlock *this_block) {
    // we fetch at max 4 byte, alignment is 2
    enum {TRAP_ID=1<<16};
    code_word_t instr = 0;
    // const typename traits::addr_t upper_bits = ~traits::PGMASK;
    phys_addr_t paddr(pc);
    auto *const data = (uint8_t *)&instr;
-    if(this->core.has_mmu())
-        paddr = this->core.virt2phys(pc);
    auto res = this->core.read(paddr, 4, data);
    if (res != iss::Ok) 
        return std::make_tuple(ILLEGAL_FETCH, nullptr);
-    if (instr == 0x0000006f || (instr&0xffff)==0xa001)
+    if (instr == 0x0000006f || (instr&0xffff)==0xa001){
+        this->builder.CreateBr(this->leave_blk);
        return std::make_tuple(JUMP_TO_SELF, nullptr);
-    ++inst_cnt;
+        }
    uint32_t inst_index = instr_decoder.decode_instr(instr);
    compile_func f = nullptr;
    if(inst_index < instr_descr.size())
@@ -5033,6 +5032,10 @@ void vm_impl<ARCH>::gen_instr_epilogue(BasicBlock *bb) {
    auto* icount_val = this->builder.CreateAdd(
        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::ICOUNT), get_reg_ptr(arch::traits<ARCH>::ICOUNT)), this->gen_const(64U, 1));
    this->builder.CreateStore(icount_val, get_reg_ptr(arch::traits<ARCH>::ICOUNT), false);
+    //increment cyclecount
+    auto* cycle_val = this->builder.CreateAdd(
+        this->builder.CreateLoad(this->get_typeptr(arch::traits<ARCH>::CYCLE), get_reg_ptr(arch::traits<ARCH>::CYCLE)), this->gen_const(64U, 1));
+    this->builder.CreateStore(cycle_val, get_reg_ptr(arch::traits<ARCH>::CYCLE), false);
 }

 } // namespace tgc5c
@@ -0,0 +1,101 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2025, MINRES Technologies GmbH
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its contributors
+//    may be used to endorse or promote products derived from this software
+//    without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Contributors:
+//       alex@minres.com - initial API and implementation
+////////////////////////////////////////////////////////////////////////////////
+
+#include "vector_functions.h"
+#include "iss/vm_types.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <math.h>
+#include <stdexcept>
+#include <vector>
+
+namespace softvector {
+
+bool softvec_read(void* core, uint64_t addr, uint64_t length, uint8_t* data) {
+    // Read length bytes from addr into *data
+    iss::status status = static_cast<iss::arch_if*>(core)->read(iss::address_type::PHYSICAL, iss::access_type::READ,
+                                                                0 /*traits<ARCH>::MEM*/, addr, length, data);
+    return status == iss::Ok;
+}
+bool softvec_write(void* core, uint64_t addr, uint64_t length, uint8_t* data) {
+    // Write length bytes from addr into *data
+    iss::status status = static_cast<iss::arch_if*>(core)->write(iss::address_type::PHYSICAL, iss::access_type::READ,
+                                                                 0 /*traits<ARCH>::MEM*/, addr, length, data);
+    return status == iss::Ok;
+}
+
+vtype_t::vtype_t(uint32_t vtype_val) { underlying = (vtype_val & 0x8000) << 32 | (vtype_val & ~0x8000); }
+vtype_t::vtype_t(uint64_t vtype_val) { underlying = vtype_val; }
+bool vtype_t::vill() { return underlying >> 63; }
+bool vtype_t::vma() { return (underlying >> 7) & 1; }
+bool vtype_t::vta() { return (underlying >> 6) & 1; }
+unsigned vtype_t::sew() {
+    uint8_t vsew = (underlying >> 3) & 0b111;
+    // pow(2, 3 + vsew);
+    return 1 << (3 + vsew);
+}
+double vtype_t::lmul() {
+    uint8_t vlmul = underlying & 0b111;
+    assert(vlmul != 0b100); // reserved encoding
+    int8_t signed_vlmul = (vlmul >> 2) ? 0b11111000 | vlmul : vlmul;
+    return pow(2, signed_vlmul);
+}
+
+mask_bit_reference& mask_bit_reference::operator=(const bool new_value) {
+    *start = *start & ~(1U << pos) | static_cast<unsigned>(new_value) << pos;
+    return *this;
+}
+
+mask_bit_reference::mask_bit_reference(uint8_t* start, uint8_t pos)
+: start(start)
+, pos(pos) {
+    assert(pos < 8 && "Bit reference can only be initialized for bytes");
+};
+mask_bit_reference::operator bool() const { return *(start) & (1U << (pos)); }
+
+mask_bit_reference vmask_view::operator[](size_t idx) const {
+    assert(idx < elem_count);
+    return {start + idx / 8, static_cast<uint8_t>(idx % 8)};
+}
+
+vmask_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx) {
+    uint8_t* mask_start = V + VLEN / 8 * reg_idx;
+    assert(mask_start + elem_count / 8 <= V + VLEN * RFS / 8);
+    return {mask_start, elem_count};
+}
+} // namespace softvector
@@ -0,0 +1,172 @@
+////////////////////////////////////////////////////////////////////////////////
+// Copyright (C) 2025, MINRES Technologies GmbH
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice,
+//    this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its contributors
+//    may be used to endorse or promote products derived from this software
+//    without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Contributors:
+//       alex@minres.com - initial API and implementation
+////////////////////////////////////////////////////////////////////////////////
+
+#ifndef _VM_VECTOR_FUNCTIONS_H_
+#define _VM_VECTOR_FUNCTIONS_H_
+
+#include "iss/arch_if.h"
+#include "iss/vm_types.h"
+#include <cstdint>
+#include <functional>
+#include <stdint.h>
+namespace softvector {
+const unsigned RFS = 32;
+
+struct vtype_t {
+    uint64_t underlying;
+    vtype_t(uint32_t vtype_val);
+    vtype_t(uint64_t vtype_val);
+    unsigned sew();
+    double lmul();
+    bool vill();
+    bool vma();
+    bool vta();
+};
+class mask_bit_reference {
+    uint8_t* start;
+    uint8_t pos;
+
+public:
+    mask_bit_reference& operator=(const bool new_value);
+    mask_bit_reference(uint8_t* start, uint8_t pos);
+    operator bool() const;
+};
+
+struct vmask_view {
+    uint8_t* start;
+    size_t elem_count;
+    mask_bit_reference operator[](size_t) const;
+};
+vmask_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx = 0);
+template <unsigned VLEN> vmask_view read_vmask(uint8_t* V, uint16_t elem_count, uint8_t reg_idx = 0);
+
+bool softvec_read(void* core, uint64_t addr, uint64_t length, uint8_t* data);
+bool softvec_write(void* core, uint64_t addr, uint64_t length, uint8_t* data);
+template <unsigned VLEN, typename eew_t>
+uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V, uint64_t vl,
+                           uint64_t vstart, vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1, uint8_t segment_size, int64_t stride = 0,
+                           bool use_stride = false);
+template <unsigned XLEN, unsigned VLEN, typename eew_t, typename sew_t>
+uint64_t vector_load_store_index(void* core, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V,
+                                 uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1, uint8_t vs2,
+                                 uint8_t segment_size);
+template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
+void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
+                      unsigned vs2, unsigned vs1);
+template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
+void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
+                   unsigned vs2, typename std::make_signed<src1_elem_t>::type imm);
+template <unsigned VLEN, typename elem_t>
+void vector_vector_carry(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd,
+                         unsigned vs2, unsigned vs1, signed carry);
+template <unsigned VLEN, typename elem_t>
+void vector_imm_carry(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, unsigned vs2,
+                      typename std::make_signed<elem_t>::type imm, signed carry);
+template <unsigned VLEN, typename scr_elem_t>
+void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1);
+template <unsigned VLEN, typename scr_elem_t>
+void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
+template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t>
+void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2);
+template <unsigned VLEN, typename elem_t>
+void mask_vector_vector_op(uint8_t* V, unsigned funct, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
+                           unsigned vs2, unsigned vs1);
+template <unsigned VLEN, typename elem_t>
+void mask_vector_imm_op(uint8_t* V, unsigned funct, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
+                        unsigned vs2, typename std::make_signed<elem_t>::type imm);
+void carry_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
+                            unsigned vs1);
+template <unsigned VLEN, typename elem_t>
+void carry_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
+                         typename std::make_signed<elem_t>::type imm);
+template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = dest_elem_t>
+bool sat_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, int64_t vxrm, bool vm,
+                          unsigned vd, unsigned vs2, unsigned vs1);
+template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = dest_elem_t>
+bool sat_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, int64_t vxrm, bool vm,
+                       unsigned vd, unsigned vs2, typename std::make_signed<src1_elem_t>::type imm);
+template <unsigned VLEN, typename dest_elem_t, typename src_elem_t = dest_elem_t>
+void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
+                   unsigned vs2, unsigned vs1);
+template <unsigned VLEN>
+void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1);
+template <unsigned VLEN> uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2);
+template <unsigned VLEN> uint64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2);
+template <unsigned VLEN> void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2);
+template <unsigned VLEN, typename src_elem_t>
+void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2);
+template <unsigned VLEN, typename src_elem_t> void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd);
+template <unsigned VLEN, typename src_elem_t> uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector);
+template <unsigned VLEN, typename src_elem_t>
+void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
+template <unsigned VLEN, typename src_elem_t>
+void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
+template <unsigned VLEN, typename src_elem_t>
+void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
+template <unsigned VLEN, typename src_elem_t>
+void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
+template <unsigned VLEN, typename dest_elem_t, typename scr_elem_t = dest_elem_t>
+void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1);
+template <unsigned VLEN, typename scr_elem_t>
+void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
+template <unsigned VLEN, typename scr_elem_t>
+void vector_compress(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, unsigned vs2, unsigned vs1);
+template <unsigned VLEN> void vector_whole_move(uint8_t* V, unsigned vd, unsigned vs2, unsigned count);
+template <unsigned VLEN, typename dest_elem_t, typename src_elem_t = dest_elem_t>
+void fp_vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
+                      unsigned vs2, unsigned vs1, uint8_t rm);
+template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
+void fp_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
+                         unsigned vs2, unsigned vs1, uint8_t rm);
+template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
+void fp_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
+                      unsigned vs2, src1_elem_t imm, uint8_t rm);
+template <unsigned VLEN, typename elem_t>
+void fp_vector_unary_op(uint8_t* V, unsigned encoding_space, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm,
+                        unsigned vd, unsigned vs2, uint8_t rm);
+template <unsigned VLEN, typename dest_elem_t, typename src_elem_t>
+void fp_vector_unary_w(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
+                       uint8_t rm);
+template <unsigned VLEN, typename dest_elem_t, typename src_elem_t>
+void fp_vector_unary_n(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
+                       uint8_t rm);
+template <unsigned VLEN, typename elem_t>
+void mask_fp_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
+                              unsigned vs1, uint8_t rm);
+template <unsigned VLEN, typename elem_t>
+void mask_fp_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
+                           elem_t imm, uint8_t rm);
+} // namespace softvector
+#include "vm/vector_functions.hpp"
+#endif /* _VM_VECTOR_FUNCTIONS_H_ */
Author	SHA1	Message	Date
alex	8f5d666b7d	corrects mistake from rebasing, adds newly generated templates	2025-03-31 12:50:05 +02:00
alex	cc123939ce	configures logger in main	2025-03-31 10:19:16 +02:00
alex	a2e5405e25	small changes regarding vector template	2025-03-31 10:19:16 +02:00
alex	cd3ec0b79d	removes conversion functions in favor of more explicit conversions	2025-03-31 10:19:16 +02:00
alex	0e35a2a8c9	adds complete Zfh support, small rework regarding floating point interface	2025-03-31 10:19:16 +02:00
alex	8220c00a3d	small correction for floating point h	2025-03-31 10:19:16 +02:00
alex	ec5fb1e87e	increases verbosity for file loading errors	2025-03-31 10:19:16 +02:00
alex	453407568c	removes carry_t, moves functionality to own functions	2025-03-31 10:19:16 +02:00
alex	0fe9e6ebc8	corrects error for narrowing fp dispatch	2025-03-31 10:19:16 +02:00
alex	484d9dbe08	removes rounding mode lookup as it is not needes	2025-03-31 10:19:15 +02:00
alex	7a7035f267	adds support for half precision float	2025-03-31 10:19:15 +02:00
alex	d9f1e5d31b	small refactor	2025-03-31 10:19:15 +02:00
alex	7b35f45a48	changes to make correct oversighst for XLEN=64 in Vector functions	2025-03-31 10:19:15 +02:00
alex	ece6f7290f	small bugfixes, adds some half point functionality	2025-03-31 10:19:15 +02:00
alex	2166a6d81e	makes widenning function types more explicit	2025-03-31 10:19:15 +02:00
alex	fe9f2a5455	corrects vectorslide, changes all loop index type	2025-03-31 10:19:15 +02:00
alex	42bf6ee380	corrects errors w.r.t. floating point dispatch	2025-03-31 10:19:15 +02:00
alex	f0b582df6c	corrects ambiguity in frsqrt	2025-03-31 10:19:15 +02:00
alex	6fcb3dbb66	adds missing floating point instructions	2025-03-31 10:19:15 +02:00
alex	c01eb39a76	reworks merge instrs, adds fp comparisons	2025-03-31 10:19:15 +02:00
alex	08280a094f	allows assigning to mask_view elements	2025-03-31 10:19:15 +02:00
alex	ae90adc854	adds most fp functions	2025-03-31 10:19:15 +02:00
alex	cd358198ad	expands floating point functions	2025-03-31 10:19:14 +02:00
alex	8746003d3e	adds floating point reduction instrs, widening are untested	2025-03-31 10:19:14 +02:00
alex	60d2b45a81	adds floating point Permutation Instructions	2025-03-31 10:19:14 +02:00
alex	0264c5d66f	small cleanup	2025-03-31 10:19:14 +02:00
alex	528c2536af	removes unused declarations	2025-03-31 10:19:14 +02:00
alex	19e38ec898	corrects bug	2025-03-31 10:19:14 +02:00
alex	fd11ce18c4	changes order of arguments to reflect assembly	2025-03-31 10:19:14 +02:00
alex	9b7a9fa273	updates indexed load to use vreg_views	2025-03-31 10:19:14 +02:00
alex	e24c1874c4	Changes load_store to use vreg_views aswell	2025-03-31 10:19:14 +02:00
alex	221d2ee38c	adds whole register moves	2025-03-31 10:19:14 +02:00
alex	877cad27ba	adds gather instructions	2025-03-31 10:19:14 +02:00
alex	a26505cb5c	adds more functions, up to slide	2025-03-31 10:19:13 +02:00
alex	c1277b6528	adds mask_mask logical instructions	2025-03-31 10:19:13 +02:00
alex	63889b02e7	adds widening reductions	2025-03-31 10:19:13 +02:00
alex	f049d8cbb3	adds Integer Reduction Instructions	2025-03-31 10:19:13 +02:00
alex	28ac169cfe	adds narrowing fixed point instructions	2025-03-31 10:19:13 +02:00
alex	a6f24db83a	adds vssrl and vssra	2025-03-31 10:19:13 +02:00
alex	e1911bc450	adds vsmul, widens functions parameters for sat_vector operations	2025-03-31 10:19:13 +02:00
alex	75d96bf18d	small cleanup, adds first fixed point instrs	2025-03-31 10:19:13 +02:00
alex	e59458aa0e	adds the missing vector csrs to the architectural state	2025-03-31 10:18:51 +02:00
alex	77807fec01	adds merge and move instructions	2025-03-31 10:18:10 +02:00
alex	6852d1d299	adds Vector Widening Integer Multiply-Add Instructions	2025-03-31 10:18:10 +02:00
alex	ac1322d66b	changes to ternary functions for Multiply-Add Instructions	2025-03-31 10:18:10 +02:00
alex	9ba9d2432c	adds Vector Widening Integer Multiply Instructions	2025-03-31 10:18:10 +02:00
alex	c9b7962cd3	adds Vector Integer Divide Instructions	2025-03-31 10:18:10 +02:00
alex	ab31fd27c9	adds single width integer instructins, also small cleanup	2025-03-31 10:18:10 +02:00
alex	b3f189145f	adds funct3 to vector functions	2025-03-31 10:18:10 +02:00
alex	dd4416ab15	adds min/max instructions	2025-03-31 10:18:10 +02:00
alex	0027946f90	renames mask operations to distinguish from vector integer compare instructions	2025-03-31 10:18:09 +02:00
alex	feaff8c4a5	adds support for narrowing shifts	2025-03-31 10:18:09 +02:00
alex	af3e76cc98	adds integer extension and add/substract with carry vector instructions	2025-03-31 10:18:09 +02:00
alex	b1ceac2c2a	small correction for vector_functions	2025-03-31 10:18:09 +02:00
alex	b5862039e7	changes order of operands to more closely resemble assembly	2025-03-31 10:18:09 +02:00
alex	51f3802394	adds vector_imm instructions to vector_functions, makes size of all involved registers a template parameter	2025-03-31 10:18:09 +02:00
alex	6ce0d97e81	general improvements to vector_functions, adds functions to process arithmetic instructions (working add)	2025-03-31 10:18:09 +02:00
alex	69c8fda5d2	corrects oversight in vector_functions	2025-03-31 10:18:09 +02:00
alex	c1f9328528	corrects vector_functions	2025-03-31 10:18:09 +02:00
alex	2b85748279	adds load_store_index to vector_functions	2025-03-31 10:18:09 +02:00
alex	f7aa51b12e	adds small optimization, clarifies variables in vector_functions	2025-03-31 10:18:09 +02:00
alex	3428745a00	small corrections in vector functions	2025-03-31 10:18:09 +02:00
alex	512b79a3e7	makes elem_count an explicit parameter for the softvector functions rather than calculating it from vtype	2025-03-31 10:18:08 +02:00
alex	7a048f8b93	changes wording of returned index to better reflect what it means, cleans up a bit	2025-03-31 10:18:08 +02:00
alex	6f4daf91ed	adds explicit RFS to assertions	2025-03-31 10:18:08 +02:00
alex	947d353bbf	adds working vector (unit) stride (segmented) loads and stores	2025-03-31 10:18:08 +02:00
alex	b95f518c91	updates templates for interp to make extension specific includes conditonal	2025-03-31 10:18:08 +02:00
alex	4cef0f57c1	updates templates and adds newly generated files	2025-03-31 10:18:05 +02:00
alex	28af695592	adds vector support to m and mu priv wrapper	2025-03-31 10:16:01 +02:00
alex	f6cdd9d07c	adds vector csr to riscv_common	2025-03-31 09:54:26 +02:00
alex	9e390971d4	corrects include guard comment for fp_functions	2025-03-31 09:54:26 +02:00
alex	2bb2e56310	adds dependencies for K ISA (Cryptography)	2025-03-31 09:54:26 +02:00
alex	a0eeae7dd6	corrects template for new arch_if changes	2025-03-30 19:12:22 +02:00
eyck	8f491ef36b	adds superflous exception throwing	2025-03-21 20:28:37 +01:00
eyck	cbe4c2d62f	adds comment to indicate purpose of arch state members	2025-03-19 12:03:12 +01:00
eyck	31c6bb55f4	applies clang format	2025-03-16 14:38:45 +01:00
eyck	63d0162119	adds license header	2025-03-16 13:33:01 +01:00
eyck	3b294d9da0	fixes sc_core_adapter wrt refactored memory hierarchy	2025-03-16 12:29:03 +01:00
eyck	54233b448d	moves mmu related code into mmu unit	2025-03-16 08:50:01 +01:00
eyck	e238369e18	cleansup htif call	2025-03-15 06:54:21 +01:00
eyck	cfc980a069	Merge branch 'feature/privilege_refactor' into develop	2025-03-14 20:00:07 +01:00
eyck	502f3e8df9	fixes htif behavior and instrumentation interface	2025-03-14 19:43:20 +01:00
hongyu	88475bfa55	changes the io_buf	2025-03-14 12:14:20 +01:00
eyck	71260a3ef4	Merge remote-tracking branch 'origin/feature/htif' into develop	2025-03-14 11:32:36 +01:00
eyck	23842742a6	factors clic & pmp into separate units	2025-03-13 12:13:41 +01:00
eyck	a13b7ac6d3	separates functional memory into separate unit	2025-03-12 09:26:51 +01:00
hongyu	aaebeaf023	changes the io_buf	2025-03-11 12:00:31 +01:00
eyck	fb0f6255e9	replaces virtual functions with memory pointers (kind of)	2025-03-11 08:31:25 +01:00
eyck	57d5ea92be	moves common functionality to base class	2025-03-10 16:00:26 +01:00
eyck	383d762abc	applies clang-format and updates SystemC HTIF implementation	2025-03-06 12:10:12 +01:00
eyck	03cbd305c6	replaces literal constant with symbolic definition	2025-02-28 19:34:07 +01:00
eyck	9f5326c110	extends htif for 32bit systems	2025-02-13 13:39:47 +01:00
eyck	f4718c6de3	Merge remote-tracking branch 'origin/feature/htif' into develop	2025-02-13 09:34:31 +01:00
eyck	53de21eef9	adds generator changed output	2025-02-12 20:45:04 +01:00
alex	d443c89c87	removes llvm from dbt-rise-tgc build system as it is handled in dbt-rise-core	2024-12-28 13:10:49 +01:00
alex	9a2df32d57	updates templates	2024-12-28 13:07:07 +01:00
alex	be0f783af8	adds cycle increment to tcc	2024-12-28 13:06:46 +01:00
alex	1089800682	updates vm_impls and core.h to work with new vm_base	2024-12-28 08:24:09 +01:00
alex	d907dc7f54	corrects tohost functionality and minor cleanup	2024-11-22 17:35:12 +01:00
alex	75e81ce236	copies new tohost implemenation from hart_m_p	2024-11-14 16:51:26 +01:00
alex	82a70efdb8	small reorder to make tohost output more readable	2024-11-14 16:51:26 +01:00
alex	978c3db06e	minor improvements to readability	2024-11-14 16:51:26 +01:00
alex	0e88664ff7	adds better tohost writing implementation, allowing the standard riscv-isa-test benchmarks to run	2024-11-14 16:51:26 +01:00