From 63889b02e7cccf4293f4a93cf71b516965cd01dc Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Wed, 19 Feb 2025 15:02:30 +0100 Subject: [PATCH] adds widening reductions --- gen_input/templates/interp/CORENAME.cpp.gtl | 13 +++++++++++ src/vm/vector_functions.hpp | 26 ++++++++++++++------- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index 50d663f..4b3aed8 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -481,6 +481,19 @@ if(vector != null) {%> throw new std::runtime_error("Unsupported sew bit value"); } } + void vector_red_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + return softvector::vector_red_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1); + case 0b001: + return softvector::vector_red_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1); + case 0b010: + return softvector::vector_red_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1); + case 0b011: // would require 128 bits vs2 value + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } <%}%> uint64_t fetch_count{0}; uint64_t tval{0}; diff --git a/src/vm/vector_functions.hpp b/src/vm/vector_functions.hpp index 668d3bf..c3ece59 100644 --- a/src/vm/vector_functions.hpp +++ b/src/vm/vector_functions.hpp @@ -798,8 +798,14 @@ template std::function get_red_funct(unsigned funct6, unsigned funct3) { if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI) switch(funct6) { - // case 0b110000: // VWREDSUMU - // case 0b110001: // VWREDSUM + case 0b110000: // VWREDSUMU + return [](dest_elem_t& running_total, src_elem_t vs2) { return running_total += static_cast(vs2); }; + case 0b110001: // VWREDSUM + return [](dest_elem_t& running_total, src_elem_t vs2) { + // cast the signed vs2 elem to unsigned to enable wraparound on overflow + return running_total += static_cast( + static_cast>(static_cast>(vs2))); + }; default: throw new std::runtime_error("Unknown funct6 in get_funct"); } @@ -814,18 +820,20 @@ std::function get_red_funct(unsigned funct6, uns case 0b000011: // VREDXOR return [](dest_elem_t& running_total, src_elem_t vs2) { running_total ^= vs2; }; case 0b000100: // VREDMINU - return [](dest_elem_t& running_total, src_elem_t vs2) { running_total = std::min(running_total, vs2); }; + return + [](dest_elem_t& running_total, src_elem_t vs2) { running_total = std::min(running_total, static_cast(vs2)); }; case 0b000101: // VREDMIN return [](dest_elem_t& running_total, src_elem_t vs2) { - running_total = - std::min(static_cast>(running_total), static_cast>(vs2)); + running_total = std::min(static_cast>(running_total), + static_cast>(static_cast>(vs2))); }; case 0b000110: // VREDMAXU - return [](dest_elem_t& running_total, src_elem_t vs2) { running_total = std::max(running_total, vs2); }; + return + [](dest_elem_t& running_total, src_elem_t vs2) { running_total = std::max(running_total, static_cast(vs2)); }; case 0b000111: // VREDMAX return [](dest_elem_t& running_total, src_elem_t vs2) { - running_total = - std::max(static_cast>(running_total), static_cast>(vs2)); + running_total = std::max(static_cast>(running_total), + static_cast>(static_cast>(vs2))); }; default: throw new std::runtime_error("Unknown funct6 in get_funct"); @@ -840,7 +848,7 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui return; uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew(); vmask_view mask_reg = read_vmask(V, elem_count); - auto vs1_elem = get_vreg(V, vs1, elem_count)[0]; + auto vs1_elem = get_vreg(V, vs1, elem_count)[0]; auto vs2_view = get_vreg(V, vs2, elem_count); auto vd_view = get_vreg(V, vd, elem_count); auto fn = get_red_funct(funct6, funct3);