From c1f93285283b2522f6f3cf5f6cbc1c818515969a Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Sun, 9 Feb 2025 17:49:56 +0100 Subject: [PATCH] corrects vector_functions --- src/vm/vector_functions.cpp | 30 +++++++++++++++++------------- src/vm/vector_functions.h | 3 ++- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/vm/vector_functions.cpp b/src/vm/vector_functions.cpp index 7f5a9d3..5ca248d 100644 --- a/src/vm/vector_functions.cpp +++ b/src/vm/vector_functions.cpp @@ -92,15 +92,18 @@ vreg_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg } uint64_t vector_load_store(void* core, std::function load_store_fn, uint8_t* V, uint16_t VLEN, uint8_t addressed_register, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, - uint8_t elem_size_byte, uint64_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride) { + uint8_t elem_size_byte, uint64_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride, + bool use_stride) { // eew = elem_size_byte * 8 assert(pow(2, EMUL_pow) * segment_size <= 8); assert(segment_size > 0); - assert((elem_count & (elem_count - 1)) == 0); // check that elem_count is power of 2 + // assert((elem_count & (elem_count - 1)) == 0); // check that elem_count is power of 2, this check does not hold for vlm.v and vsm.v assert(elem_count <= VLEN * RFS / 8); unsigned emul_stride = EMUL_pow <= 0 ? 1 : pow(2, EMUL_pow); assert(emul_stride * segment_size <= 8); assert(!(addressed_register % emul_stride)); + if(!use_stride) + stride = elem_size_byte * segment_size; vreg_view mask_view = read_vmask(V, VLEN, elem_count, 0); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) @@ -113,7 +116,7 @@ uint64_t vector_load_store(void* core, std::function(*reinterpret_cast(V)); + return *reinterpret_cast(V); case 16: - return static_cast(*reinterpret_cast(V)); + return *reinterpret_cast(V); case 32: - return static_cast(*reinterpret_cast(V)); + return *reinterpret_cast(V); case 64: - return static_cast(*reinterpret_cast(V)); + return *reinterpret_cast(V); default: throw new std::invalid_argument("Invalid arg in read_n_bits"); } @@ -165,10 +169,9 @@ uint64_t vector_load_store_index(void* core, std::function 0); assert((elem_count & (elem_count - 1)) == 0); // check that elem_count is power of 2 assert(elem_count <= VLEN * RFS / 8); - unsigned data_emul_stride = vtype.lmul() < 0 ? 0 : vtype.lmul(); + unsigned data_emul_stride = vtype.lmul() < 1 ? 1 : vtype.lmul(); assert(data_emul_stride * segment_size <= 8); unsigned data_elem_size_byte = vtype.sew() / 8; - assert(!(addressed_register % data_emul_stride)); vreg_view mask_view = read_vmask(V, VLEN, elem_count, 0); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) @@ -180,7 +183,7 @@ uint64_t vector_load_store_index(void* core, std::function::max() : std::numeric_limits::max(); unsigned index_offset = offset_val & mask; @@ -207,7 +210,8 @@ uint64_t vector_load_store_index(void* core, std::function load_store_fn, uint8_t* V, uint16_t VLEN, uint8_t addressed_register, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, - uint8_t elem_size_byte, uint64_t elem_count, int8_t EMUL_pow, uint8_t segment_size = 1, int64_t stride = 1); + uint8_t elem_size_byte, uint64_t elem_count, int8_t EMUL_pow, uint8_t segment_size = 1, int64_t stride = 0, + bool use_stride = false); uint64_t vector_load_store_index(void* core, std::function load_store_fn, uint8_t* V, uint16_t VLEN, uint8_t XLEN, uint8_t addressed_register, uint8_t index_register, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uint8_t elem_size_byte, uint64_t elem_count,