diff --git a/README.md b/README.md
index fe8cef5..9c0d7a8 100644
--- a/README.md
+++ b/README.md
@@ -12,8 +12,8 @@ DBT-RISE-RiscV uses libGIS (https://github.com/vsergeev/libGIS) as well as ELFIO
 
 **What's missing**
 
-* RV64I is only preliminary verified
-* F & D standard extensions to be implemented
+* F & D standard extensions for 32bit to be implemented
+* MACF &D standard extensions for 64bit to be implemented and verified
 
 **Planned features**
 
diff --git a/riscv/gen_input/RV32C.core_desc b/riscv/gen_input/RV32C.core_desc
index 6b453b8..77ca0af 100644
--- a/riscv/gen_input/RV32C.core_desc
+++ b/riscv/gen_input/RV32C.core_desc
@@ -12,6 +12,13 @@ InsructionSet RV32CI {
 				PC[XLEN](is_pc)
 	}
 	instructions{
+        JALR(no_cont){ // overwriting the implementation if rv32i, alignment does not need to be word
+            encoding: imm[11:0]s | rs1[4:0] | b000 | rd[4:0] | b1100111;
+            args_disass: "x%rd$d, x%rs1$d, 0x%imm$x";
+            if(rd!=0) X[rd] <= PC+4;
+            val ret[XLEN] <= X[rs1]+ imm;
+            PC<=ret& ~0x1;
+        }
 		C.ADDI4SPN { //(RES, imm=0)
 			encoding: b000 | imm[5:4] | imm[9:6] | imm[2:2] | imm[3:3] | rd[2:0] | b00;
 			args_disass: "x%rd$d, 0x%imm$05x";
diff --git a/riscv/gen_input/RV32IBase.core_desc b/riscv/gen_input/RV32IBase.core_desc
index 9b0d16b..70cd43b 100644
--- a/riscv/gen_input/RV32IBase.core_desc
+++ b/riscv/gen_input/RV32IBase.core_desc
@@ -38,9 +38,14 @@ InsructionSet RV32IBase {
 	    JALR(no_cont){
 	    	encoding: imm[11:0]s | rs1[4:0] | b000 | rd[4:0] | b1100111;
 		    args_disass: "x%rd$d, x%rs1$d, 0x%imm$x";
-    		if(rd!=0) X[rd] <= PC+4;
-    		val ret[XLEN] <= X[rs1]+ imm;
-    		PC<=ret& ~0x1;
+            val new_pc[XLEN] <= X[rs1]+ imm;
+            val align[XLEN] <= new_pc & 0x2;
+		    if(align != 0){
+		        raise(0, 0)
+		    } else {
+        		if(rd!=0) X[rd] <= PC+4;
+        		PC<=new_pc & ~0x1;
+    		}
 	    }
 		BEQ(no_cont){
 		    encoding: imm[12:12]s |imm[10:5]s | rs2[4:0] | rs1[4:0] | b000 | imm[4:1]s | imm[11:11]s | b1100011;
@@ -154,17 +159,29 @@ InsructionSet RV32IBase {
 		SLLI {
 		    encoding: b0000000 | shamt[4:0] | rs1[4:0] | b001 | rd[4:0] | b0010011;
 		    args_disass:"x%rd$d, x%rs1$d, %shamt%";
-		    if(rd != 0) X[rd] <= shll(X[rs1], shamt);
+		    if(shamt > 31){
+		        raise(0,0)
+		    } else {
+		        if(rd != 0) X[rd] <= shll(X[rs1], shamt);
+		    }
 		}
 		SRLI {
 		    encoding: b0000000 | shamt[4:0] | rs1[4:0] | b101 | rd[4:0] | b0010011;
 		    args_disass:"x%rd$d, x%rs1$d, %shamt%";
-		    if(rd != 0) X[rd] <= shrl(X[rs1], shamt);
+            if(shamt > 31){
+                raise(0,0)
+            } else {
+		        if(rd != 0) X[rd] <= shrl(X[rs1], shamt);
+		    }
 		}
 		SRAI {
 		    encoding: b0100000 | shamt[4:0] | rs1[4:0] | b101 | rd[4:0] | b0010011;
 		    args_disass:"x%rd$d, x%rs1$d, %shamt%";
-		    if(rd != 0) X[rd] <= shra(X[rs1], shamt);
+		    if(shamt > 31){
+                raise(0,0)
+            } else {
+		        if(rd != 0) X[rd] <= shra(X[rs1], shamt);
+		    }
 		}
 		ADD {
 		    encoding: b0000000 | rs2[4:0] | rs1[4:0] | b000 | rd[4:0] | b0110011;
diff --git a/riscv/gen_input/RV64A.core_desc b/riscv/gen_input/RV64A.core_desc
index f7875d9..91ffb3d 100644
--- a/riscv/gen_input/RV64A.core_desc
+++ b/riscv/gen_input/RV64A.core_desc
@@ -1,6 +1,7 @@
 import "RV64IBase.core_desc"
+import "RV32A.core_desc"
 
-InsructionSet RV64A extends RV64IBase{
+InsructionSet RV64A extends RV64IBase {
 	 
 	address_spaces { 
 		RES[8]
diff --git a/riscv/gen_input/RV64IBase.core_desc b/riscv/gen_input/RV64IBase.core_desc
index 68ca175..013b716 100644
--- a/riscv/gen_input/RV64IBase.core_desc
+++ b/riscv/gen_input/RV64IBase.core_desc
@@ -39,7 +39,7 @@ InsructionSet RV64IBase extends RV32IBase {
 			encoding: imm[11:0]s | rs1[4:0] | b000 | rd[4:0] | b0011011;
 			args_disass:"x%rd$d, x%rs1$d, %imm%";
 			if(rd != 0){
-				val res[32] <= X[rs1]{32} + imm{32};
+				val res[32] <= X[rs1]{32} + imm;
 				X[rd] <= sext(res);
 			} 
 		}
diff --git a/riscv/gen_input/minres_rv.core_desc b/riscv/gen_input/minres_rv.core_desc
index a34c6f5..d6e74fc 100644
--- a/riscv/gen_input/minres_rv.core_desc
+++ b/riscv/gen_input/minres_rv.core_desc
@@ -6,7 +6,7 @@ import "RV64IBase.core_desc"
 //import "RV64M.core_desc"
 import "RV64A.core_desc"
 
-Core RV32IMAC provides RV32IBase,RV32M,RV32A, RV32CI  {
+Core RV32IMAC provides RV32IBase, RV32M, RV32A, RV32CI {
     template:"vm_riscv.in.cpp";
     constants {
         XLEN:=32;
@@ -25,8 +25,8 @@ Core RV32IMAC provides RV32IBase,RV32M,RV32A, RV32CI  {
 }
 
 
-Core RV64IA provides RV64IBase,RV64A {
-   template:"vm_riscv.in.cpp";
+Core RV64IA provides RV64IBase, RV64A, RV32A {
+    template:"vm_riscv.in.cpp";
     constants {
         XLEN:=64;
         XLEN2:=128;
@@ -37,7 +37,7 @@ Core RV64IA provides RV64IBase,RV64A {
         fencevmal:=2;
         fencevmau:=3;
         //          XL    ZYXWVUTSRQPONMLKJIHGFEDCBA
-        MISA_VAL:=0b10000000000001000001000100000000;
+        MISA_VAL:=0b10000000000001000000000100000001;
         PGSIZE := 4096; //1 << 12;
         PGMASK := 4095; //PGSIZE-1
     }
diff --git a/riscv/incl/iss/arch/riscv_hart_msu_vp.h b/riscv/incl/iss/arch/riscv_hart_msu_vp.h
index 15a7ecb..4eddbd7 100644
--- a/riscv/incl/iss/arch/riscv_hart_msu_vp.h
+++ b/riscv/incl/iss/arch/riscv_hart_msu_vp.h
@@ -970,12 +970,13 @@ iss::status riscv_hart_msu_vp::write_mem(phys_addr_t paddr, unsigned lengt
                 if (tohost_upper || (tohost_lower && to_host_wr_cnt > 0)) {
                     switch (hostvar >> 48) {
                     case 0:
-                        if (hostvar != 0x1)
+                        if (hostvar != 0x1){
                             LOG(FATAL) << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar
                                        << "), stopping simulation";
-                        else
+                        }else{
                             LOG(INFO) << "tohost value is 0x" << std::hex << hostvar << std::dec << " (" << hostvar
                                       << "), stopping simulation";
+                        }
                         throw(iss::simulation_stopped(hostvar));
                     case 0x0101: {
                         char c = static_cast(hostvar & 0xff);
diff --git a/riscv/incl/iss/arch/rv32imac.h b/riscv/incl/iss/arch/rv32imac.h
index 5dc4bf4..78ea62f 100644
--- a/riscv/incl/iss/arch/rv32imac.h
+++ b/riscv/incl/iss/arch/rv32imac.h
@@ -28,7 +28,7 @@
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.
 // 
-// Created on: Fri Nov 17 20:34:49 CET 2017
+// Created on: Sun Nov 19 14:05:47 CET 2017
 //             *      rv32imac.h Author: 
 //
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/riscv/incl/iss/arch/rv64ia.h b/riscv/incl/iss/arch/rv64ia.h
index 2bfbd96..b0b2d09 100644
--- a/riscv/incl/iss/arch/rv64ia.h
+++ b/riscv/incl/iss/arch/rv64ia.h
@@ -28,7 +28,7 @@
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.
 // 
-// Created on: Fri Nov 17 20:34:49 CET 2017
+// Created on: Sun Nov 19 14:05:47 CET 2017
 //             *      rv64ia.h Author: 
 //
 ////////////////////////////////////////////////////////////////////////////////
@@ -48,7 +48,7 @@ struct rv64ia;
 template<>
 struct traits {
 
-    enum constants {XLEN=64,XLEN2=128,XLEN_BIT_MASK=63,PCLEN=64,fence=0,fencei=1,fencevmal=2,fencevmau=3,MISA_VAL=2147750144,PGSIZE=4096,PGMASK=4095};
+    enum constants {XLEN=64,XLEN2=128,XLEN_BIT_MASK=63,PCLEN=64,fence=0,fencei=1,fencevmal=2,fencevmau=3,MISA_VAL=2147746049,PGSIZE=4096,PGMASK=4095};
 
     enum reg_e {
         X0,
diff --git a/riscv/src/internal/vm_riscv.in.cpp b/riscv/src/internal/vm_riscv.in.cpp
index 2d37da0..c87e818 100644
--- a/riscv/src/internal/vm_riscv.in.cpp
+++ b/riscv/src/internal/vm_riscv.in.cpp
@@ -222,16 +222,24 @@ private:
      ****************************************************************************/
     std::tuple illegal_intruction(virt_addr_t &pc, code_word_t instr,
                                                                           llvm::BasicBlock *bb) {
-        // this->gen_sync(iss::PRE_SYNC);
-        this->builder->CreateStore(this->builder->CreateLoad(get_reg_ptr(traits::NEXT_PC), true),
-                                   get_reg_ptr(traits::PC), true);
-        this->builder->CreateStore(
-            this->builder->CreateAdd(this->builder->CreateLoad(get_reg_ptr(traits::ICOUNT), true),
-                                     this->gen_const(64U, 1)),
-            get_reg_ptr(traits::ICOUNT), true);
-        if (this->debugging_enabled()) this->gen_sync(iss::PRE_SYNC);
+        bb->setName("illegal_instruction");
+
+        this->gen_sync(iss::PRE_SYNC);
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            /* generate console output when executing the command */
+            boost::format ins_fmter("DB x%1$d");
+            ins_fmter % (uint64_t)instr;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
         pc = pc + ((instr & 3) == 3 ? 4 : 2);
-        this->gen_raise_trap(0, 2);     // illegal instruction trap
+
+        this->gen_raise_trap(0, 2);
         this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
         this->gen_trap_check(this->leave_blk);
         return std::make_tuple(iss::vm::BRANCH, nullptr);
diff --git a/riscv/src/internal/vm_rv64ia.cpp b/riscv/src/internal/vm_rv64ia.cpp
index fbd5aa8..a609ff7 100644
--- a/riscv/src/internal/vm_rv64ia.cpp
+++ b/riscv/src/internal/vm_rv64ia.cpp
@@ -215,7 +215,7 @@ private:
     };
 
     /* start generated code */
-    const InstructionDesriptor instr_descr[75] = {
+    const InstructionDesriptor instr_descr[86] = {
         /* entries are: valid value, valid mask, function ptr */
         /* instruction LWU */
         {32, 0b00000000000000000110000000000011, 0b00000000000000000111000001111111, &this_class::__lwu},
@@ -223,12 +223,6 @@ private:
         {32, 0b00000000000000000011000000000011, 0b00000000000000000111000001111111, &this_class::__ld},
         /* instruction SD */
         {32, 0b00000000000000000011000000100011, 0b00000000000000000111000001111111, &this_class::__sd},
-        /* instruction SLLI */
-        {32, 0b00000000000000000001000000010011, 0b11111100000000000111000001111111, &this_class::__slli},
-        /* instruction SRLI */
-        {32, 0b00000000000000000101000000010011, 0b11111100000000000111000001111111, &this_class::__srli},
-        /* instruction SRAI */
-        {32, 0b01000000000000000101000000010011, 0b11111100000000000111000001111111, &this_class::__srai},
         /* instruction ADDIW */
         {32, 0b00000000000000000000000000011011, 0b00000000000000000111000001111111, &this_class::__addiw},
         /* instruction SLLIW */
@@ -295,6 +289,12 @@ private:
         {32, 0b00000000000000000110000000010011, 0b00000000000000000111000001111111, &this_class::__ori},
         /* instruction ANDI */
         {32, 0b00000000000000000111000000010011, 0b00000000000000000111000001111111, &this_class::__andi},
+        /* instruction SLLI */
+        {32, 0b00000000000000000001000000010011, 0b11111110000000000111000001111111, &this_class::__slli},
+        /* instruction SRLI */
+        {32, 0b00000000000000000101000000010011, 0b11111110000000000111000001111111, &this_class::__srli},
+        /* instruction SRAI */
+        {32, 0b01000000000000000101000000010011, 0b11111110000000000111000001111111, &this_class::__srai},
         /* instruction ADD */
         {32, 0b00000000000000000000000000110011, 0b11111110000000000111000001111111, &this_class::__add},
         /* instruction SUB */
@@ -367,6 +367,28 @@ private:
         {32, 0b11000000000000000011000000101111, 0b11111000000000000111000001111111, &this_class::__amominu_d},
         /* instruction AMOMAXU.D */
         {32, 0b11100000000000000011000000101111, 0b11111000000000000111000001111111, &this_class::__amomaxu_d},
+        /* instruction LR.W */
+        {32, 0b00010000000000000010000000101111, 0b11111001111100000111000001111111, &this_class::__lr_w},
+        /* instruction SC.W */
+        {32, 0b00011000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__sc_w},
+        /* instruction AMOSWAP.W */
+        {32, 0b00001000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__amoswap_w},
+        /* instruction AMOADD.W */
+        {32, 0b00000000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__amoadd_w},
+        /* instruction AMOXOR.W */
+        {32, 0b00100000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__amoxor_w},
+        /* instruction AMOAND.W */
+        {32, 0b01100000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__amoand_w},
+        /* instruction AMOOR.W */
+        {32, 0b01000000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__amoor_w},
+        /* instruction AMOMIN.W */
+        {32, 0b10000000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__amomin_w},
+        /* instruction AMOMAX.W */
+        {32, 0b10100000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__amomax_w},
+        /* instruction AMOMINU.W */
+        {32, 0b11000000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__amominu_w},
+        /* instruction AMOMAXU.W */
+        {32, 0b11100000000000000010000000101111, 0b11111000000000000111000001111111, &this_class::__amomaxu_w},
     };
     // instruction LWU
     std::tuple __lwu(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
@@ -483,111 +505,6 @@ private:
         return std::make_tuple(vm::CONT, bb);
     }
     
-    // instruction SLLI
-    std::tuple __slli(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
-        bb->setName("SLLI");
-    
-        this->gen_sync(iss::PRE_SYNC);
-    
-        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
-        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
-        uint8_t fld_shamt_val = 0 | (bit_sub<20,6>(instr));
-        if(this->disass_enabled){
-            /* generate console output when executing the command */
-            boost::format ins_fmter("SLLI x%1$d, x%2$d, %3%");
-            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_shamt_val;
-            std::vector args {
-                this->core_ptr,
-                this->gen_const(64, pc.val),
-                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
-            };
-            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
-        }
-        pc=pc+4;
-    
-        if(fld_rd_val != 0){
-            Value* X_rd_val = this->builder->CreateShl(
-                this->gen_reg_load(fld_rs1_val, 0),
-                this->gen_const(64U, fld_shamt_val));
-            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
-        }
-        this->gen_set_pc(pc, traits::NEXT_PC);
-        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
-        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
-        this->gen_trap_check(bb);
-        return std::make_tuple(vm::CONT, bb);
-    }
-    
-    // instruction SRLI
-    std::tuple __srli(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
-        bb->setName("SRLI");
-    
-        this->gen_sync(iss::PRE_SYNC);
-    
-        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
-        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
-        uint8_t fld_shamt_val = 0 | (bit_sub<20,6>(instr));
-        if(this->disass_enabled){
-            /* generate console output when executing the command */
-            boost::format ins_fmter("SRLI x%1$d, x%2$d, %3%");
-            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_shamt_val;
-            std::vector args {
-                this->core_ptr,
-                this->gen_const(64, pc.val),
-                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
-            };
-            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
-        }
-        pc=pc+4;
-    
-        if(fld_rd_val != 0){
-            Value* X_rd_val = this->builder->CreateLShr(
-                this->gen_reg_load(fld_rs1_val, 0),
-                this->gen_const(64U, fld_shamt_val));
-            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
-        }
-        this->gen_set_pc(pc, traits::NEXT_PC);
-        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
-        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
-        this->gen_trap_check(bb);
-        return std::make_tuple(vm::CONT, bb);
-    }
-    
-    // instruction SRAI
-    std::tuple __srai(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
-        bb->setName("SRAI");
-    
-        this->gen_sync(iss::PRE_SYNC);
-    
-        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
-        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
-        uint8_t fld_shamt_val = 0 | (bit_sub<20,6>(instr));
-        if(this->disass_enabled){
-            /* generate console output when executing the command */
-            boost::format ins_fmter("SRAI x%1$d, x%2$d, %3%");
-            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_shamt_val;
-            std::vector args {
-                this->core_ptr,
-                this->gen_const(64, pc.val),
-                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
-            };
-            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
-        }
-        pc=pc+4;
-    
-        if(fld_rd_val != 0){
-            Value* X_rd_val = this->builder->CreateAShr(
-                this->gen_reg_load(fld_rs1_val, 0),
-                this->gen_const(64U, fld_shamt_val));
-            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
-        }
-        this->gen_set_pc(pc, traits::NEXT_PC);
-        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
-        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
-        this->gen_trap_check(bb);
-        return std::make_tuple(vm::CONT, bb);
-    }
-    
     // instruction ADDIW
     std::tuple __addiw(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
         bb->setName("ADDIW");
@@ -1113,19 +1030,43 @@ private:
         }
         pc=pc+4;
     
-        if(fld_rd_val != 0){
-            Value* X_rd_val = this->builder->CreateAdd(
-                this->gen_reg_load(traits::PC, 0),
-                this->gen_const(64U, 4));
-            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
-        }
-        Value* ret_val = this->builder->CreateAdd(
+        Value* new_pc_val = this->builder->CreateAdd(
             this->gen_reg_load(fld_rs1_val, 0),
             this->gen_const(64U, fld_imm_val));
-        Value* PC_val = this->builder->CreateAnd(
-            ret_val,
-            this->builder->CreateNot(this->gen_const(64U, 1)));
-        this->builder->CreateStore(PC_val, get_reg_ptr(traits::NEXT_PC), false);
+        Value* align_val = this->builder->CreateAnd(
+            new_pc_val,
+            this->gen_const(64U, 2));
+        llvm::BasicBlock* bbnext = llvm::BasicBlock::Create(this->mod->getContext(), "endif", this->func, this->leave_blk);
+        llvm::BasicBlock* bb_then = llvm::BasicBlock::Create(this->mod->getContext(), "thenbr", this->func, bbnext);
+        llvm::BasicBlock* bb_else = llvm::BasicBlock::Create(this->mod->getContext(), "elsebr", this->func, bbnext);
+        // this->builder->SetInsertPoint(bb);
+        this->gen_cond_branch(this->builder->CreateICmp(
+            ICmpInst::ICMP_NE,
+            align_val,
+            this->gen_const(64U, 0)),
+            bb_then,
+            bb_else);
+        this->builder->SetInsertPoint(bb_then);
+        {
+            this->gen_raise_trap(0, 0);
+        }
+        this->builder->CreateBr(bbnext);
+        this->builder->SetInsertPoint(bb_else);
+        {
+            if(fld_rd_val != 0){
+                Value* X_rd_val = this->builder->CreateAdd(
+                    this->gen_reg_load(traits::PC, 1),
+                    this->gen_const(64U, 4));
+                this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+            }
+            Value* PC_val = this->builder->CreateAnd(
+                new_pc_val,
+                this->builder->CreateNot(this->gen_const(64U, 1)));
+            this->builder->CreateStore(PC_val, get_reg_ptr(traits::NEXT_PC), false);
+        }
+        this->builder->CreateBr(bbnext);
+        bb=bbnext;
+        this->builder->SetInsertPoint(bb);
         this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
         this->gen_trap_check(this->leave_blk);
         return std::make_tuple(iss::vm::BRANCH, nullptr);
@@ -1914,6 +1855,111 @@ private:
         return std::make_tuple(vm::CONT, bb);
     }
     
+    // instruction SLLI
+    std::tuple __slli(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("SLLI");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_shamt_val = 0 | (bit_sub<20,5>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("SLLI x%1$d, x%2$d, %3%");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_shamt_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        if(fld_rd_val != 0){
+            Value* X_rd_val = this->builder->CreateShl(
+                this->gen_reg_load(fld_rs1_val, 0),
+                this->gen_const(64U, fld_shamt_val));
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction SRLI
+    std::tuple __srli(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("SRLI");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_shamt_val = 0 | (bit_sub<20,5>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("SRLI x%1$d, x%2$d, %3%");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_shamt_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        if(fld_rd_val != 0){
+            Value* X_rd_val = this->builder->CreateLShr(
+                this->gen_reg_load(fld_rs1_val, 0),
+                this->gen_const(64U, fld_shamt_val));
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction SRAI
+    std::tuple __srai(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("SRAI");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_shamt_val = 0 | (bit_sub<20,5>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("SRAI x%1$d, x%2$d, %3%");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_shamt_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        if(fld_rd_val != 0){
+            Value* X_rd_val = this->builder->CreateAShr(
+                this->gen_reg_load(fld_rs1_val, 0),
+                this->gen_const(64U, fld_shamt_val));
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
     // instruction ADD
     std::tuple __add(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
         bb->setName("ADD");
@@ -3374,22 +3420,600 @@ private:
         return std::make_tuple(vm::CONT, bb);
     }
     
+    // instruction LR.W
+    std::tuple __lr_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("LR.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("LR.W x%1$d, x%2$d");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        if(fld_rd_val != 0){
+            Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+            Value* X_rd_val = this->gen_ext(
+                this->gen_read_mem(traits::MEM, offs_val, 32/8),
+                64,
+                true);
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+            Value* RES_offs_val = this->gen_ext(
+                this->builder->CreateNeg(this->gen_const(8U, 1)),
+                32,
+                true);
+            this->gen_write_mem(
+                traits::RES,
+                offs_val,
+                this->builder->CreateZExtOrTrunc(RES_offs_val,this->get_type(32)));
+        }
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction SC.W
+    std::tuple __sc_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("SC.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("SC.W x%1$d, x%2$d, x%3$d");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        Value* res1_val = this->gen_read_mem(traits::RES, offs_val, 32/8);
+        llvm::BasicBlock* bbnext = llvm::BasicBlock::Create(this->mod->getContext(), "endif", this->func, this->leave_blk);
+        llvm::BasicBlock* bb_then = llvm::BasicBlock::Create(this->mod->getContext(), "thenbr", this->func, bbnext);
+        // this->builder->SetInsertPoint(bb);
+        this->gen_cond_branch(this->builder->CreateICmp(
+            ICmpInst::ICMP_NE,
+            res1_val,
+            this->gen_const(32U, 0)),
+            bb_then,
+            bbnext);
+        this->builder->SetInsertPoint(bb_then);
+        {
+            Value* MEM_offs_val = this->gen_reg_load(fld_rs2_val, 1);
+            this->gen_write_mem(
+                traits::MEM,
+                offs_val,
+                this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        }
+        this->builder->CreateBr(bbnext);
+        bb=bbnext;
+        this->builder->SetInsertPoint(bb);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = this->gen_choose(
+                this->builder->CreateICmp(
+                    ICmpInst::ICMP_NE,
+                    res1_val,
+                    this->gen_const(64U, 0)),
+                this->gen_const(64U, 0),
+                this->gen_const(64U, 1),
+                64);
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction AMOSWAP.W
+    std::tuple __amoswap_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("AMOSWAP.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("AMOSWAP.W x%1$d, x%2$d, x%3$d (aqu=%4$d,rel=%5$d)");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val % (uint64_t)fld_aq_val % (uint64_t)fld_rl_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = this->gen_ext(
+                this->gen_read_mem(traits::MEM, offs_val, 32/8),
+                64,
+                true);
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        Value* MEM_offs_val = this->gen_reg_load(fld_rs2_val, 0);
+        this->gen_write_mem(
+            traits::MEM,
+            offs_val,
+            this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction AMOADD.W
+    std::tuple __amoadd_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("AMOADD.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("AMOADD.W x%1$d, x%2$d, x%3$d (aqu=%4$d,rel=%5$d)");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val % (uint64_t)fld_aq_val % (uint64_t)fld_rl_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        Value* res1_val = this->gen_ext(
+            this->gen_read_mem(traits::MEM, offs_val, 32/8),
+            64,
+            true);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = res1_val;
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        Value* res2_val = this->builder->CreateAdd(
+            res1_val,
+            this->gen_reg_load(fld_rs2_val, 0));
+        Value* MEM_offs_val = res2_val;
+        this->gen_write_mem(
+            traits::MEM,
+            offs_val,
+            this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction AMOXOR.W
+    std::tuple __amoxor_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("AMOXOR.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("AMOXOR.W x%1$d, x%2$d, x%3$d (aqu=%4$d,rel=%5$d)");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val % (uint64_t)fld_aq_val % (uint64_t)fld_rl_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        Value* res1_val = this->gen_ext(
+            this->gen_read_mem(traits::MEM, offs_val, 32/8),
+            64,
+            true);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = res1_val;
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        Value* res2_val = this->builder->CreateXor(
+            res1_val,
+            this->gen_reg_load(fld_rs2_val, 0));
+        Value* MEM_offs_val = res2_val;
+        this->gen_write_mem(
+            traits::MEM,
+            offs_val,
+            this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction AMOAND.W
+    std::tuple __amoand_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("AMOAND.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("AMOAND.W x%1$d, x%2$d, x%3$d (aqu=%4$d,rel=%5$d)");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val % (uint64_t)fld_aq_val % (uint64_t)fld_rl_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        Value* res1_val = this->gen_ext(
+            this->gen_read_mem(traits::MEM, offs_val, 32/8),
+            64,
+            true);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = res1_val;
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        Value* res2_val = this->builder->CreateAnd(
+            res1_val,
+            this->gen_reg_load(fld_rs2_val, 0));
+        Value* MEM_offs_val = res2_val;
+        this->gen_write_mem(
+            traits::MEM,
+            offs_val,
+            this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction AMOOR.W
+    std::tuple __amoor_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("AMOOR.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("AMOOR.W x%1$d, x%2$d, x%3$d (aqu=%4$d,rel=%5$d)");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val % (uint64_t)fld_aq_val % (uint64_t)fld_rl_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        Value* res1_val = this->gen_ext(
+            this->gen_read_mem(traits::MEM, offs_val, 32/8),
+            64,
+            true);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = res1_val;
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        Value* res2_val = this->builder->CreateOr(
+            res1_val,
+            this->gen_reg_load(fld_rs2_val, 0));
+        Value* MEM_offs_val = res2_val;
+        this->gen_write_mem(
+            traits::MEM,
+            offs_val,
+            this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction AMOMIN.W
+    std::tuple __amomin_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("AMOMIN.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("AMOMIN.W x%1$d, x%2$d, x%3$d (aqu=%4$d,rel=%5$d)");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val % (uint64_t)fld_aq_val % (uint64_t)fld_rl_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        Value* res1_val = this->gen_ext(
+            this->gen_read_mem(traits::MEM, offs_val, 32/8),
+            64,
+            true);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = res1_val;
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        Value* res2_val = this->gen_choose(
+            this->builder->CreateICmp(
+                ICmpInst::ICMP_SGT,
+                this->gen_ext(
+                    res1_val,
+                    64, true),
+                this->gen_ext(
+                    this->gen_reg_load(fld_rs2_val, 0),
+                    64, true)),
+            this->gen_reg_load(fld_rs2_val, 0),
+            res1_val,
+            64);
+        Value* MEM_offs_val = res2_val;
+        this->gen_write_mem(
+            traits::MEM,
+            offs_val,
+            this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction AMOMAX.W
+    std::tuple __amomax_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("AMOMAX.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("AMOMAX.W x%1$d, x%2$d, x%3$d (aqu=%4$d,rel=%5$d)");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val % (uint64_t)fld_aq_val % (uint64_t)fld_rl_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        Value* res1_val = this->gen_ext(
+            this->gen_read_mem(traits::MEM, offs_val, 32/8),
+            64,
+            true);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = res1_val;
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        Value* res2_val = this->gen_choose(
+            this->builder->CreateICmp(
+                ICmpInst::ICMP_SLT,
+                this->gen_ext(
+                    res1_val,
+                    64, true),
+                this->gen_ext(
+                    this->gen_reg_load(fld_rs2_val, 0),
+                    64, true)),
+            this->gen_reg_load(fld_rs2_val, 0),
+            res1_val,
+            64);
+        Value* MEM_offs_val = res2_val;
+        this->gen_write_mem(
+            traits::MEM,
+            offs_val,
+            this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction AMOMINU.W
+    std::tuple __amominu_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("AMOMINU.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("AMOMINU.W x%1$d, x%2$d, x%3$d (aqu=%4$d,rel=%5$d)");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val % (uint64_t)fld_aq_val % (uint64_t)fld_rl_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        Value* res1_val = this->gen_ext(
+            this->gen_read_mem(traits::MEM, offs_val, 32/8),
+            64,
+            false);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = res1_val;
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        Value* res2_val = this->gen_choose(
+            this->builder->CreateICmp(
+                ICmpInst::ICMP_UGT,
+                res1_val,
+                this->gen_reg_load(fld_rs2_val, 0)),
+            this->gen_reg_load(fld_rs2_val, 0),
+            res1_val,
+            64);
+        Value* MEM_offs_val = res2_val;
+        this->gen_write_mem(
+            traits::MEM,
+            offs_val,
+            this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
+    // instruction AMOMAXU.W
+    std::tuple __amomaxu_w(virt_addr_t& pc, code_word_t instr, llvm::BasicBlock* bb){
+        bb->setName("AMOMAXU.W");
+    
+        this->gen_sync(iss::PRE_SYNC);
+    
+        uint8_t fld_rd_val = 0 | (bit_sub<7,5>(instr));
+        uint8_t fld_rs1_val = 0 | (bit_sub<15,5>(instr));
+        uint8_t fld_rs2_val = 0 | (bit_sub<20,5>(instr));
+        uint8_t fld_rl_val = 0 | (bit_sub<25,1>(instr));
+        uint8_t fld_aq_val = 0 | (bit_sub<26,1>(instr));
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            boost::format ins_fmter("AMOMAXU.W x%1$d, x%2$d, x%3$d (aqu=%4$d,rel=%5$d)");
+            ins_fmter % (uint64_t)fld_rd_val % (uint64_t)fld_rs1_val % (uint64_t)fld_rs2_val % (uint64_t)fld_aq_val % (uint64_t)fld_rl_val;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
+        pc=pc+4;
+    
+        Value* offs_val = this->gen_reg_load(fld_rs1_val, 0);
+        Value* res1_val = this->gen_ext(
+            this->gen_read_mem(traits::MEM, offs_val, 32/8),
+            64,
+            false);
+        if(fld_rd_val != 0){
+            Value* X_rd_val = res1_val;
+            this->builder->CreateStore(X_rd_val, get_reg_ptr(fld_rd_val), false);
+        }
+        Value* res2_val = this->gen_choose(
+            this->builder->CreateICmp(
+                ICmpInst::ICMP_ULT,
+                this->gen_ext(
+                    res1_val,
+                    64, false),
+                this->gen_ext(
+                    this->gen_reg_load(fld_rs2_val, 0),
+                    64, false)),
+            this->gen_reg_load(fld_rs2_val, 0),
+            res1_val,
+            64);
+        Value* MEM_offs_val = res2_val;
+        this->gen_write_mem(
+            traits::MEM,
+            offs_val,
+            this->builder->CreateZExtOrTrunc(MEM_offs_val,this->get_type(32)));
+        this->gen_set_pc(pc, traits::NEXT_PC);
+        this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
+        bb = llvm::BasicBlock::Create(this->mod->getContext(), "entry", this->func, this->leave_blk); /* create next BasicBlock in chain */
+        this->gen_trap_check(bb);
+        return std::make_tuple(vm::CONT, bb);
+    }
+    
 /* end generated code  */
     /****************************************************************************
      * end opcode definitions
      ****************************************************************************/
     std::tuple illegal_intruction(virt_addr_t &pc, code_word_t instr,
                                                                           llvm::BasicBlock *bb) {
-        // this->gen_sync(iss::PRE_SYNC);
-        this->builder->CreateStore(this->builder->CreateLoad(get_reg_ptr(traits::NEXT_PC), true),
-                                   get_reg_ptr(traits::PC), true);
-        this->builder->CreateStore(
-            this->builder->CreateAdd(this->builder->CreateLoad(get_reg_ptr(traits::ICOUNT), true),
-                                     this->gen_const(64U, 1)),
-            get_reg_ptr(traits::ICOUNT), true);
-        if (this->debugging_enabled()) this->gen_sync(iss::PRE_SYNC);
+        bb->setName("illegal_instruction");
+
+        this->gen_sync(iss::PRE_SYNC);
+        if(this->disass_enabled){
+            /* generate console output when executing the command */
+            /* generate console output when executing the command */
+            boost::format ins_fmter("DB 0x%1$x");
+            ins_fmter % (uint64_t)instr;
+            std::vector args {
+                this->core_ptr,
+                this->gen_const(64, pc.val),
+                this->builder->CreateGlobalStringPtr(ins_fmter.str()),
+            };
+            this->builder->CreateCall(this->mod->getFunction("print_disass"), args);
+        }
         pc = pc + ((instr & 3) == 3 ? 4 : 2);
-        this->gen_raise_trap(0, 2);     // illegal instruction trap
+
+        this->gen_raise_trap(0, 2);
         this->gen_sync(iss::POST_SYNC); /* call post-sync if needed */
         this->gen_trap_check(this->leave_blk);
         return std::make_tuple(iss::vm::BRANCH, nullptr);
diff --git a/riscv/src/main.cpp b/riscv/src/main.cpp
index c21cc03..4b49090 100644
--- a/riscv/src/main.cpp
+++ b/riscv/src/main.cpp
@@ -54,23 +54,24 @@ int main(int argc, char *argv[]) {
     // clang-format off
     desc.add_options()
         ("help,h", "Print help message")
-        ("verbose,v", po::value()->implicit_value(0), "Sets logging verbosity")
-        ("log-file", po::value(), "Sets default log file.")
+        ("loglevel,l", po::value()->implicit_value(2), "Sets logging verbosity")
+        ("logfile,f", po::value(), "Sets default log file.")
         ("disass,d", po::value()->implicit_value(""), "Enables disassembly")
-        ("elf,l", po::value>(), "ELF file(s) to load")
+        ("elf", po::value>(), "ELF file(s) to load")
         ("gdb-port,g", po::value()->default_value(0), "enable gdb server and specify port to use")
         ("input,i", po::value(), "the elf file to load (instead of hex files)")
         ("dump-ir", "dump the intermediate representation")
         ("cycles,c", po::value()->default_value(-1), "number of cycles to run")
         ("systemc,s", "Run as SystemC simulation")
-        ("time", po::value(), "SystemC siimulation time in ms")
+        ("time", po::value(), "SystemC simulation time in ms")
         ("reset,r", po::value(), "reset address")
         ("trace", po::value(), "enable tracing, or cmbintation of 1=signals and 2=TX text, 4=TX compressed text, 6=TX in SQLite")
         ("mem,m", po::value(), "the memory input file")
-        ("rv64", "run RV64");
+        ("isa", po::value()->default_value("rv32imac"), "isa to use for simulation");
     // clang-format on
+    auto parsed = po::command_line_parser(argc, argv).options(desc).allow_unregistered().run();
     try {
-        po::store(po::parse_command_line(argc, argv, desc), clim); // can throw
+        po::store(parsed, clim); // can throw
         // --help option
         if (clim.count("help")) {
             std::cout << "DBT-RISE-RiscV simulator for RISC-V" << std::endl << desc << std::endl;
@@ -83,14 +84,16 @@ int main(int argc, char *argv[]) {
         std::cerr << desc << std::endl;
         return 1;
     }
-    if (clim.count("verbose")) {
-        auto l = logging::as_log_level(clim["verbose"].as());
+    std::vector args = collect_unrecognized(parsed.options, po::include_positional);
+
+    if (clim.count("loglevel")) {
+        auto l = logging::as_log_level(clim["loglevel"].as());
         LOGGER(DEFAULT)::reporting_level() = l;
         LOGGER(connection)::reporting_level() = l;
     }
-    if (clim.count("log-file")) {
+    if (clim.count("logfile")) {
         // configure the connection logger
-        auto f = fopen(clim["log-file"].as().c_str(), "w");
+        auto f = fopen(clim["logfile"].as().c_str(), "w");
         LOG_OUTPUT(DEFAULT)::stream() = f;
         LOG_OUTPUT(connection)::stream() = f;
     }
@@ -101,19 +104,21 @@ int main(int argc, char *argv[]) {
         bool dump = clim.count("dump-ir");
         // instantiate the simulator
         std::unique_ptr vm{nullptr};
-        if (clim.count("rv64") == 1) {
-            auto cpu = new iss::arch::riscv_hart_msu_vp();
-            vm = iss::create(cpu, clim["gdb-port"].as(), dump);
+        if (clim["isa"].as().substr(0, 4)=="rv64") {
+            iss::arch::rv64ia* cpu = new iss::arch::riscv_hart_msu_vp();
+            vm = iss::create(cpu, clim["gdb-port"].as(), dump);
+        } else if (clim["isa"].as().substr(0, 4)=="rv32") {
+            iss::arch::rv32imac* cpu = new iss::arch::riscv_hart_msu_vp();
+            vm = iss::create(cpu, clim["gdb-port"].as(), dump);
         } else {
-            auto cpu = new iss::arch::riscv_hart_msu_vp();
-            vm = iss::create(cpu, clim["gdb-port"].as(), dump);
+            LOG(ERROR) << "Illegal argument value for '--isa': " << clim["isa"].as() << std::endl;
+            return 127;
         }
-        if (clim.count("elf")) {
+        if (clim.count("elf"))
             for (std::string input : clim["elf"].as>()) vm->get_arch()->load_file(input);
-        } else if (clim.count("mem")) {
+        if (clim.count("mem"))
             vm->get_arch()->load_file(clim["mem"].as(), iss::arch::traits::MEM);
-        }
-
+        for (std::string input : args) vm->get_arch()->load_file(input);// treat remaining arguments as elf files
         if (clim.count("disass")) {
             vm->setDisassEnabled(true);
             LOGGER(disass)::reporting_level() = logging::INFO;