From e24c1874c4fe459ac257e9b33fd5ba664b5b2630 Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Sat, 22 Feb 2025 16:51:53 +0100 Subject: [PATCH] Changes load_store to use vreg_views aswell --- gen_input/templates/interp/CORENAME.cpp.gtl | 60 ++++++++++++++++++--- src/vm/vector_functions.cpp | 53 ------------------ src/vm/vector_functions.h | 4 ++ src/vm/vector_functions.hpp | 30 +++++++++++ 4 files changed, 86 insertions(+), 61 deletions(-) diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index f03c627..a6628eb 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -170,17 +170,61 @@ if(vector != null) {%> inline void lower(){ this->core.reg.trap_state = 0; } - uint64_t vlseg(uint8_t* V, uint8_t vd, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size){ - return softvector::vector_load_store(this->get_arch(), softvector::softvec_read, V, traits::VLEN, vd, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size); + uint64_t vlseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size){ + switch(width_val){ + case 0b000: + return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size); + case 0b101: + return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size); + case 0b110: + return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size); + case 0b111: + return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size); + default: + throw new std::runtime_error("Unsupported width bit value"); + } } - uint64_t vsseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size){ - return softvector::vector_load_store(this->get_arch(), softvector::softvec_write, V, traits::VLEN, vs3, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size); + uint64_t vsseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size){ + switch(width_val){ + case 0b000: + return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size); + case 0b101: + return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size); + case 0b110: + return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size); + case 0b111: + return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size); + default: + throw new std::runtime_error("Unsupported width bit value"); + } } - uint64_t vlsseg(uint8_t* V, uint8_t vd, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int${xlen}_t stride){ - return softvector::vector_load_store(this->get_arch(), softvector::softvec_read, V, traits::VLEN, vd, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size, stride); + uint64_t vlsseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size, int64_t stride){ + switch(width_val){ + case 0b000: + return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true); + case 0b101: + return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true); + case 0b110: + return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true); + case 0b111: + return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true); + default: + throw new std::runtime_error("Unsupported width bit value"); + } } - uint64_t vssseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int${xlen}_t stride){ - return softvector::vector_load_store(this->get_arch(), softvector::softvec_write, V, traits::VLEN, vs3, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size, stride); + uint64_t vssseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size, int64_t stride){ + switch(width_val){ + case 0b000: + return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true); + case 0b101: + return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true); + case 0b110: + return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true); + case 0b111: + return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true); + default: + throw new std::runtime_error("Unsupported width bit value"); + } } uint64_t vlxseg(uint8_t* V, uint8_t vd, uint8_t vs2, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, uint8_t segment_size, bool ordered){ return softvector::vector_load_store_index(this->get_arch(), softvector::softvec_read, V, traits::VLEN, traits::XLEN, vd, vs2, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, segment_size, ordered); diff --git a/src/vm/vector_functions.cpp b/src/vm/vector_functions.cpp index dbf2436..d4f48b6 100644 --- a/src/vm/vector_functions.cpp +++ b/src/vm/vector_functions.cpp @@ -84,59 +84,6 @@ vmask_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t re assert(mask_start + elem_count / 8 <= V + VLEN * RFS / 8); return {mask_start, elem_count}; } -uint64_t vector_load_store(void* core, std::function load_store_fn, uint8_t* V, uint16_t VLEN, - uint8_t addressed_register, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, - uint8_t elem_size_byte, uint64_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride, - bool use_stride) { - // eew = elem_size_byte * 8 - assert(pow(2, EMUL_pow) * segment_size <= 8); - assert(segment_size > 0); - // assert((elem_count & (elem_count - 1)) == 0); // check that elem_count is power of 2, this check does not hold for vlm.v and vsm.v - assert(elem_count <= VLEN * RFS / 8); - unsigned emul_stride = EMUL_pow <= 0 ? 1 : pow(2, EMUL_pow); - assert(emul_stride * segment_size <= 8); - assert(!(addressed_register % emul_stride)); - if(!use_stride) - stride = elem_size_byte * segment_size; - vmask_view mask_reg = read_vmask(V, VLEN, elem_count); - // elements w/ index smaller than vstart are in the prestart and get skipped - // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { - unsigned trap_idx = idx; - bool mask_active = vm ? 1 : mask_reg[idx]; - if(mask_active) { - for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) { - // base + selected register + current_elem + current_segment - uint8_t* addressed_elem = V + (addressed_register * VLEN / 8) + (elem_size_byte * idx) + (VLEN / 8 * s_idx * emul_stride); - assert(addressed_elem <= V + VLEN * RFS / 8); - uint64_t addr = base_addr + stride * idx + s_idx * elem_size_byte; - if(!load_store_fn(core, addr, elem_size_byte, addressed_elem)) - return trap_idx; - } - } else { - for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) { - // base + selected register + current_elem + current_segment - uint8_t* addressed_elem = V + (addressed_register * VLEN / 8) + (elem_size_byte * idx) + (VLEN / 8 * s_idx * emul_stride); - assert(addressed_elem <= V + VLEN * RFS / 8); - // this only updates the first 8 bits, so eew > 8 would not work correctly - *addressed_elem = vtype.vma() ? *addressed_elem : *addressed_elem; - } - } - } - // elements w/ index larger than elem_count are in the tail (fractional LMUL) - // elements w/ index larger than vl are in the tail - unsigned maximum_elems = VLEN * vtype.lmul() / (elem_size_byte * 8); - for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { - for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) { - // base + selected register + current_elem + current_segment - uint8_t* addressed_elem = V + (addressed_register * VLEN / 8) + (elem_size_byte * idx) + (VLEN / 8 * s_idx * emul_stride); - assert(addressed_elem <= V + VLEN * RFS / 8); - // this only updates the first 8 bits, so eew > 8 would not work correctly - *addressed_elem = vtype.vta() ? *addressed_elem : *addressed_elem; - } - } - return 0; -} uint64_t read_n_bits(uint8_t* V, unsigned n) { switch(n) { case 8: diff --git a/src/vm/vector_functions.h b/src/vm/vector_functions.h index 25bb8cb..94bd5a9 100644 --- a/src/vm/vector_functions.h +++ b/src/vm/vector_functions.h @@ -72,6 +72,10 @@ uint64_t vector_load_store_index(void* core, std::function +uint64_t vector_load_store(void* core, std::function load_store_fn, uint8_t* V, uint64_t vl, + uint64_t vstart, vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1, uint8_t segment_size, int64_t stride = 0, + bool use_stride = false); template void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1, carry_t carry = carry_t::NO_CARRY, bool merge = false); diff --git a/src/vm/vector_functions.hpp b/src/vm/vector_functions.hpp index 8d525df..04b7a30 100644 --- a/src/vm/vector_functions.hpp +++ b/src/vm/vector_functions.hpp @@ -95,6 +95,36 @@ template <> struct twice { using type = __uint128_t; }; #endif template using twice_t = typename twice::type; // for convenience +template +uint64_t vector_load_store(void* core, std::function load_store_fn, uint8_t* V, uint64_t vl, + uint64_t vstart, vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1, uint8_t segment_size, int64_t stride, + bool use_stride) { + unsigned vlmax = VLEN * vtype.lmul() / vtype.sew(); + auto emul_stride = std::max(vlmax, VLEN / (sizeof(eew_t) * 8)); + auto vd_view = get_vreg(V, vd, emul_stride * segment_size); + vmask_view mask_reg = read_vmask(V, VLEN, vlmax); + for(size_t idx = vstart; idx < vl; idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + signed stride_offset = stride * idx; + auto seg_offset = use_stride ? 0 : segment_size * sizeof(eew_t) * idx; + for(size_t s_idx = 0; s_idx < segment_size; s_idx++) { + eew_t* addressed_elem = &vd_view[idx + emul_stride * s_idx]; + uint64_t addr = rs1 + stride_offset + seg_offset + s_idx * sizeof(eew_t); + if(!load_store_fn(core, addr, sizeof(eew_t), reinterpret_cast(addressed_elem))) + return idx; + } + } else { + for(size_t s_idx = 0; s_idx < segment_size; s_idx++) { + // vtype.vma(); + } + } + } + for(size_t idx = vl; idx < vlmax; idx++) { + } + return 0; +} + template std::function get_funct(unsigned funct6, unsigned funct3) { if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)