From 221d2ee38c991a301133900d0d957d2c2d1757ab Mon Sep 17 00:00:00 2001
From: Eyck-Alexander Jentzsch <alex@minres.com>
Date: Fri, 21 Feb 2025 18:22:49 +0100
Subject: [PATCH] adds whole register moves

---
 gen_input/templates/interp/CORENAME.cpp.gtl | 17 ++++++++++++++
 src/vm/vector_functions.h                   |  3 +++
 src/vm/vector_functions.hpp                 | 26 +++++++++++++++++++++
 3 files changed, 46 insertions(+)
diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl
index 211b97d..f03c627 100644
--- a/gen_input/templates/interp/CORENAME.cpp.gtl
+++ b/gen_input/templates/interp/CORENAME.cpp.gtl
@@ -664,6 +664,23 @@ if(vector != null) {%>
                 throw new std::runtime_error("Unsupported sew bit value");
         }
     }
+    void vector_compress(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
+        switch(sew_val){
+            case 0b000:
+                return softvector::vector_compress<${vlen}, uint8_t>(V, vl, vstart, vtype, vd, vs2, vs1);
+            case 0b001:
+                return softvector::vector_compress<${vlen}, uint16_t>(V, vl, vstart, vtype, vd, vs2, vs1);
+            case 0b010:
+                return softvector::vector_compress<${vlen}, uint32_t>(V, vl, vstart, vtype, vd, vs2, vs1);
+            case 0b011:
+                return softvector::vector_compress<${vlen}, uint64_t>(V, vl, vstart, vtype, vd, vs2, vs1);
+            default:
+                throw new std::runtime_error("Unsupported sew bit value");
+        }
+    }
+    void vector_whole_move(uint8_t* V, uint8_t vd, uint8_t vs2, uint8_t count){
+        return softvector::vector_whole_move<${vlen}>(V, vd, vs2, count);
+    }
 <%}%>
     uint64_t fetch_count{0};
     uint64_t tval{0};
diff --git a/src/vm/vector_functions.h b/src/vm/vector_functions.h
index bd2668e..25bb8cb 100644
--- a/src/vm/vector_functions.h
+++ b/src/vm/vector_functions.h
@@ -119,6 +119,9 @@ template <unsigned VLEN, typename dest_elem_t, typename scr_elem_t = dest_elem_t
 void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1);
 template <unsigned VLEN, typename scr_elem_t>
 void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
+template <unsigned VLEN, typename scr_elem_t>
+void vector_compress(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, unsigned vs2, unsigned vs1);
+template <unsigned VLEN> void vector_whole_move(uint8_t* V, unsigned vd, unsigned vs2, unsigned count);
 } // namespace softvector
 #include "vm/vector_functions.hpp"
 #endif /* _VM_VECTOR_FUNCTIONS_H_ */
diff --git a/src/vm/vector_functions.hpp b/src/vm/vector_functions.hpp
index 68c7f26..8d525df 100644
--- a/src/vm/vector_functions.hpp
+++ b/src/vm/vector_functions.hpp
@@ -34,6 +34,7 @@
 #pragma once
 #include "vm/vector_functions.h"
 #include <cstdint>
+#include <cstring>
 #include <functional>
 #include <limits>
 #include <stdexcept>
@@ -1107,4 +1108,29 @@ void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype,
     }
     return;
 }
+template <unsigned VLEN, typename scr_elem_t>
+void vector_compress(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, unsigned vs2, unsigned vs1) {
+    uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew();
+    vmask_view mask_reg = read_vmask<VLEN>(V, vlmax, vs1);
+    auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, vlmax);
+    auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, vlmax);
+    unsigned current_pos = 0;
+    for(unsigned idx = vstart; idx < std::min(vlmax, vl); idx++) {
+        if(mask_reg[idx]) {
+            vd_view[current_pos] = vs2_view[idx];
+            current_pos += 1;
+        }
+    }
+    for(unsigned idx = current_pos; idx < vlmax; idx++) {
+        vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
+    }
+    return;
+}
+template <unsigned VLEN> void vector_whole_move(uint8_t* V, unsigned vd, unsigned vs2, unsigned count) {
+    auto vd_view = get_vreg<VLEN, uint8_t>(V, vd, 1);
+    auto vs2_view = get_vreg<VLEN, uint8_t>(V, vs2, 1);
+    memcpy(vd_view.start, vs2_view.start, VLEN / 8 * count);
+    return;
+}
+
 } // namespace softvector
\ No newline at end of file