From 877cad27ba6675ded92b4a98b6a909775fc94720 Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Fri, 21 Feb 2025 17:29:57 +0100 Subject: [PATCH] adds gather instructions --- gen_input/templates/interp/CORENAME.cpp.gtl | 42 +++++++++++++++++++++ src/vm/vector_functions.h | 5 ++- src/vm/vector_functions.hpp | 39 +++++++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index 0241fcc..211b97d 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -622,6 +622,48 @@ if(vector != null) {%> throw new std::runtime_error("Unsupported sew bit value"); } } + void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + return softvector::vector_vector_gather<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1); + case 0b001: + return softvector::vector_vector_gather<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1); + case 0b010: + return softvector::vector_vector_gather<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1); + case 0b011: + return softvector::vector_vector_gather<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vector_vector_gatherei16(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + return softvector::vector_vector_gather<${vlen}, uint8_t, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1); + case 0b001: + return softvector::vector_vector_gather<${vlen}, uint16_t, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1); + case 0b010: + return softvector::vector_vector_gather<${vlen}, uint32_t, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1); + case 0b011: + return softvector::vector_vector_gather<${vlen}, uint64_t, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint64_t imm, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + return softvector::vector_imm_gather<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b001: + return softvector::vector_imm_gather<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b010: + return softvector::vector_imm_gather<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b011: + return softvector::vector_imm_gather<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } <%}%> uint64_t fetch_count{0}; uint64_t tval{0}; diff --git a/src/vm/vector_functions.h b/src/vm/vector_functions.h index 1186e72..bd2668e 100644 --- a/src/vm/vector_functions.h +++ b/src/vm/vector_functions.h @@ -115,7 +115,10 @@ template void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm); template void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm); - +template +void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1); +template +void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm); } // namespace softvector #include "vm/vector_functions.hpp" #endif /* _VM_VECTOR_FUNCTIONS_H_ */ diff --git a/src/vm/vector_functions.hpp b/src/vm/vector_functions.hpp index 5b07aac..68c7f26 100644 --- a/src/vm/vector_functions.hpp +++ b/src/vm/vector_functions.hpp @@ -1068,4 +1068,43 @@ void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0]; } } +template +void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1) { + uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew(); + vmask_view mask_reg = read_vmask(V, vlmax); + auto vs1_view = get_vreg(V, vs1, vlmax); + auto vs2_view = get_vreg(V, vs2, vlmax); + auto vd_view = get_vreg(V, vd, vlmax); + for(unsigned idx = vstart; idx < std::min(vlmax, vl); idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + vd_view[idx] = (vs1_view[idx] >= vlmax) ? 0 : vs2_view[vs1_view[idx]]; + } else { + vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; + } + } + for(unsigned idx = vl; idx < vlmax; idx++) { + vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; + } + return; +} +template +void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) { + uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew(); + vmask_view mask_reg = read_vmask(V, vlmax); + auto vs2_view = get_vreg(V, vs2, vlmax); + auto vd_view = get_vreg(V, vd, vlmax); + for(unsigned idx = vstart; idx < std::min(vlmax, vl); idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + vd_view[idx] = (imm >= vlmax) ? 0 : vs2_view[imm]; + } else { + vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; + } + } + for(unsigned idx = vl; idx < vlmax; idx++) { + vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; + } + return; +} } // namespace softvector \ No newline at end of file