From f7aa51b12ea111129b589c2872d1921977bf25b9 Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Wed, 5 Feb 2025 17:04:16 +0100 Subject: [PATCH] adds small optimization, clarifies variables in vector_functions --- src/vm/vector_functions.cpp | 43 +++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/vm/vector_functions.cpp b/src/vm/vector_functions.cpp index 556f0ec..fbf166d 100644 --- a/src/vm/vector_functions.cpp +++ b/src/vm/vector_functions.cpp @@ -41,6 +41,7 @@ #include #include #include +#include namespace softvector { unsigned RFS = 32; @@ -85,40 +86,44 @@ double vtype_t::lmul() { return pow(2, signed_vlmul); } -vreg_view read_vmask(uint8_t* V, uint8_t VLEN, uint16_t elem_count, uint8_t reg_idx) { +vreg_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx) { uint8_t* mask_start = V + VLEN / 8 * reg_idx; return {mask_start, elem_count / 8u}; // this can return size==0 as elem_count can be as low as 1 } -uint64_t vector_load_store(void* core, std::function load_store_fn, uint8_t* V, uint8_t VLEN, - uint8_t vd, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uint8_t elem_byte_size, - uint64_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride) { +uint64_t vector_load_store(void* core, std::function load_store_fn, uint8_t* V, uint16_t VLEN, + uint8_t addressed_register, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, + uint8_t elem_size_byte, uint64_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride) { + // eew = elem_size_byte * 8 assert(pow(2, EMUL_pow) * segment_size <= 8); assert(segment_size > 0); assert((elem_count & (elem_count - 1)) == 0); // check that elem_count is power of 2 assert(elem_count <= VLEN * RFS / 8); - unsigned eew = elem_byte_size * 8; unsigned emul_stride = EMUL_pow <= 0 ? 1 : pow(2, EMUL_pow); assert(emul_stride * segment_size <= 8); - assert(!(vd % emul_stride)); + assert(!(addressed_register % emul_stride)); vreg_view mask_view = read_vmask(V, VLEN, elem_count, 0); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { unsigned trap_idx = idx; - // vm decides active body element uint8_t current_mask_byte = mask_view.get(idx / 8); bool mask_active = vm ? 1 : current_mask_byte & (1 << idx % 8); - for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) { - // base + selected vd + current_elem + current_segment - uint8_t* dest_elem = V + (vd * VLEN / 8) + (eew / 8 * idx) + (VLEN / 8 * s_idx * emul_stride); - assert(dest_elem <= V + VLEN * RFS / 8); - if(mask_active) { - uint64_t addr = base_addr + (eew / 8) * (idx * segment_size + s_idx) * stride; - if(!load_store_fn(core, addr, eew / 8, dest_elem)) + if(mask_active) { + for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) { + // base + selected register + current_elem + current_segment + uint8_t* addressed_elem = V + (addressed_register * VLEN / 8) + (elem_size_byte * idx) + (VLEN / 8 * s_idx * emul_stride); + assert(addressed_elem <= V + VLEN * RFS / 8); + uint64_t addr = base_addr + (elem_size_byte) * (idx * segment_size + s_idx) * stride; + if(!load_store_fn(core, addr, elem_size_byte, addressed_elem)) return trap_idx; - } else { + } + } else { + for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) { + // base + selected register + current_elem + current_segment + uint8_t* addressed_elem = V + (addressed_register * VLEN / 8) + (elem_size_byte * idx) + (VLEN / 8 * s_idx * emul_stride); + assert(addressed_elem <= V + VLEN * RFS / 8); // this only updates the first 8 bits, so eew > 8 would not work correctly - *dest_elem = vtype.vma() ? *dest_elem : *dest_elem; + *addressed_elem = vtype.vma() ? *addressed_elem : *addressed_elem; } } } @@ -127,10 +132,10 @@ uint64_t vector_load_store(void* core, std::function 8 would not work correctly - *dest_elem = vtype.vta() ? *dest_elem : *dest_elem; + *addressed_elem = vtype.vta() ? *addressed_elem : *addressed_elem; } } return 0;