From 51f380239460fcbda0369224e5d574ced0f4a946 Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Wed, 12 Feb 2025 20:19:25 +0100 Subject: [PATCH] adds vector_imm instructions to vector_functions, makes size of all involved registers a template parameter --- gen_input/templates/interp/CORENAME.cpp.gtl | 64 ++++++++++++++++ src/vm/vector_functions.h | 5 +- src/vm/vector_functions.hpp | 83 +++++++++++---------- 3 files changed, 110 insertions(+), 42 deletions(-) diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index fbd528f..71048e9 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -224,6 +224,70 @@ if(vector != null) {%> throw new std::runtime_error("Unsupported sew bit value"); } } + void vector_vector_wv(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + softvector::vector_vector_op<${vlen}, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + case 0b001: + softvector::vector_vector_op<${vlen}, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + case 0b010: + softvector::vector_vector_op<${vlen}, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + case 0b011: // would widen to 128 bits + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vector_imm_wv(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + softvector::vector_imm_op<${vlen}, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + case 0b001: + softvector::vector_imm_op<${vlen}, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + case 0b010: + softvector::vector_imm_op<${vlen}, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + case 0b011: // would widen to 128 bits + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vector_vector_ww(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + softvector::vector_vector_op<${vlen}, uint16_t, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + case 0b001: + softvector::vector_vector_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + case 0b010: + softvector::vector_vector_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + case 0b011: // would widen to 128 bits + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vector_imm_ww(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + softvector::vector_imm_op<${vlen}, uint16_t, uint16_t, uint8_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + case 0b001: + softvector::vector_imm_op<${vlen}, uint32_t, uint32_t, uint16_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + case 0b010: + softvector::vector_imm_op<${vlen}, uint64_t, uint64_t, uint32_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + case 0b011: // would widen to 128 bits + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } <%}%> uint64_t fetch_count{0}; uint64_t tval{0}; diff --git a/src/vm/vector_functions.h b/src/vm/vector_functions.h index a66c8d5..d7cf2f8 100644 --- a/src/vm/vector_functions.h +++ b/src/vm/vector_functions.h @@ -71,9 +71,12 @@ uint64_t vector_load_store_index(void* core, std::function +template void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2, unsigned vs1, unsigned vd); +template +void vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2, + typename std::make_signed::type imm, unsigned vd); } // namespace softvector #include "vm/vector_functions.hpp" #endif /* _VM_VECTOR_FUNCTIONS_H_ */ diff --git a/src/vm/vector_functions.hpp b/src/vm/vector_functions.hpp index bde361c..18c5152 100644 --- a/src/vm/vector_functions.hpp +++ b/src/vm/vector_functions.hpp @@ -64,49 +64,50 @@ template vmask_view read_vmask(uint8_t* V, uint16_t elem_count, assert(mask_start + elem_count / 8 <= V + VLEN * RFS / 8); return {mask_start, elem_count}; } -template std::function get_funct(unsigned funct) { +template +std::function get_funct(unsigned funct) { switch(funct) { - case 0b000000: { - // VADD - return [](elem_t vs2, elem_t vs1) { return vs2 + vs1; }; - } - case 0b000010: { - // VSUB - } - case 0b000100: { - // VMINU - } - case 0b000101: { - // VMIN - } - case 0b000110: { - // VMAXU - } - case 0b000111: { - // VMAX - } - case 0b001001: { - // VAND - } - case 0b001010: { - // VOR - } - case 0b001011: { - // VXOR - } + case 0b000000: // VADD + case 0b110000: // VWADDU + case 0b110100: // VWADDU.W + return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; }; + case 0b000010: // VSUB + case 0b110010: // VWSUBU + case 0b110110: // VWSUBU.W + return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; }; + case 0b000011: // VRSUB + return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 - vs2; }; + case 0b001001: // VAND + return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 & vs2; }; + case 0b001010: // VOR + return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 | vs2; }; + case 0b001011: // VXOR + return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 ^ vs2; }; + case 0b110001: // VWADD + case 0b110101: // VWADD.W + return [](src2_elem_t vs2, src1_elem_t vs1) { + return static_cast>(static_cast>(vs2) + + static_cast>(vs1)); + }; + case 0b110011: // VWSUB + case 0b110111: // VWSUB.W + return [](src2_elem_t vs2, src1_elem_t vs1) { + return static_cast>(static_cast>(vs2) - + static_cast>(vs1)); + }; default: throw new std::runtime_error("Uknown funct in get_funct"); } } -template +template void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2, unsigned vs1, unsigned vd) { uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew(); vmask_view mask_reg = read_vmask(V, elem_count); - auto vs1_view = get_vreg(V, vs1, elem_count); - auto vs2_view = get_vreg(V, vs2, elem_count); - auto vd_view = get_vreg(V, vd, elem_count); - auto fn = get_funct(funct6); + auto vs1_view = get_vreg(V, vs1, elem_count); + auto vs2_view = get_vreg(V, vs2, elem_count); + auto vd_view = get_vreg(V, vd, elem_count); + auto fn = get_funct(funct6); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { @@ -119,20 +120,20 @@ void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, } // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail - unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(elem_t) * 8); + unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; } -template +template void vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2, - typename std::make_signed::type imm, unsigned vd) { + typename std::make_signed::type imm, unsigned vd) { uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew(); vmask_view mask_reg = read_vmask(V, elem_count); - auto vs2_view = get_vreg(V, vs2, elem_count); - auto vd_view = get_vreg(V, vd, elem_count); - auto fn = get_funct(funct6); + auto vs2_view = get_vreg(V, vs2, elem_count); + auto vd_view = get_vreg(V, vd, elem_count); + auto fn = get_funct(funct6); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { @@ -145,7 +146,7 @@ void vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vt } // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail - unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(elem_t) * 8); + unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; }