From a26505cb5cf4f096d8e8ddf0b1a0776ac3c838ad Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Fri, 21 Feb 2025 14:59:33 +0100 Subject: [PATCH] adds more functions, up to slide --- gen_input/templates/interp/CORENAME.cpp.gtl | 125 +++++++++++++ src/vm/vector_functions.h | 14 ++ src/vm/vector_functions.hpp | 194 ++++++++++++++++++-- 3 files changed, 317 insertions(+), 16 deletions(-) diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index 890888a..0241fcc 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -497,6 +497,131 @@ if(vector != null) {%> void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1){ return softvector::mask_mask_op<${vlen}>(V, funct6, funct3, vl, vstart, vd, vs2, vs1); } + uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2){ + return softvector::vcpop<${vlen}>(V, vl, vstart, vm, vs2); + } + int64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2){ + return softvector::vfirst<${vlen}>(V, vl, vstart, vm, vs2); + } + void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2){ + return softvector::mask_set_op<${vlen}>(V, enc, vl, vstart, vm, vd, vs2); + } + void viota(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + return softvector::viota<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2); + case 0b001: + return softvector::viota<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2); + case 0b010: + return softvector::viota<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2); + case 0b011: + return softvector::viota<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vid(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + return softvector::vid<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd); + case 0b001: + return softvector::vid<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd); + case 0b010: + return softvector::vid<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd); + case 0b011: + return softvector::vid<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void scalar_to_vector(uint8_t* V, softvector::vtype_t vtype, unsigned vd, uint64_t val, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + softvector::scalar_move<${vlen}, uint8_t>(V, vtype, vd, val, true); + break; + case 0b001: + softvector::scalar_move<${vlen}, uint16_t>(V, vtype, vd, val, true); + break; + case 0b010: + softvector::scalar_move<${vlen}, uint32_t>(V, vtype, vd, val, true); + break; + case 0b011: + softvector::scalar_move<${vlen}, uint64_t>(V, vtype, vd, val, true); + break; + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + uint64_t scalar_from_vector(uint8_t* V, softvector::vtype_t vtype, unsigned vd, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + return softvector::scalar_move<${vlen}, uint8_t>(V, vtype, vd, 0, false); + case 0b001: + return softvector::scalar_move<${vlen}, uint16_t>(V, vtype, vd, 0, false); + case 0b010: + return softvector::scalar_move<${vlen}, uint32_t>(V, vtype, vd, 0, false); + case 0b011: + return softvector::scalar_move<${vlen}, uint64_t>(V, vtype, vd, 0, false); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) { + switch(sew_val){ + case 0b000: + return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b001: + return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b010: + return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b011: + return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) { + switch(sew_val){ + case 0b000: + return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm); + case 0b001: + return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm); + case 0b010: + return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm); + case 0b011: + return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val) { + switch(sew_val){ + case 0b000: + return softvector::vector_slide1up<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b001: + return softvector::vector_slide1up<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b010: + return softvector::vector_slide1up<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b011: + return softvector::vector_slide1up<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } + void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val) { + switch(sew_val){ + case 0b000: + return softvector::vector_slide1down<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b001: + return softvector::vector_slide1down<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b010: + return softvector::vector_slide1down<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + case 0b011: + return softvector::vector_slide1down<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } <%}%> uint64_t fetch_count{0}; uint64_t tval{0}; diff --git a/src/vm/vector_functions.h b/src/vm/vector_functions.h index 19b8cc7..1186e72 100644 --- a/src/vm/vector_functions.h +++ b/src/vm/vector_functions.h @@ -102,6 +102,20 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui unsigned vs2, unsigned vs1); template void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1); +template uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2); +template uint64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2); +template void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2); +template +void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2); +template void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd); +template uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector); +template +void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm); +template +void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm); +template +void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm); + } // namespace softvector #include "vm/vector_functions.hpp" #endif /* _VM_VECTOR_FUNCTIONS_H_ */ diff --git a/src/vm/vector_functions.hpp b/src/vm/vector_functions.hpp index cba2bc9..5b07aac 100644 --- a/src/vm/vector_functions.hpp +++ b/src/vm/vector_functions.hpp @@ -57,7 +57,7 @@ template struct vreg_view { return *(reinterpret_cast(start) + idx); } }; - +// TODO: change the order of parameters so that it is in snyc with read_vmask template vreg_view get_vreg(uint8_t* V, uint8_t reg_idx, uint16_t elem_count) { assert(V + elem_count * sizeof(elem_t) <= V + VLEN * RFS / 8); return {V + VLEN / 8 * reg_idx, elem_count}; @@ -122,9 +122,6 @@ std::function get_funct(unsi return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 | vs2; }; case 0b001011: // VXOR return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 ^ vs2; }; - // case 0b001100: // VRGATHER - // case 0b001110: // VRGATHEREI16 - // case 0b001111: // VLSLIDEDOWN case 0b010000: // VADC return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; }; case 0b010010: // VSBC @@ -134,7 +131,6 @@ std::function get_funct(unsi }; case 0b100101: // VSLL return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 << (vs1 & shift_mask()); }; - // case 0b100111: // VMVR case 0b101000: // VSRL return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask()); }; case 0b101001: // VSRA @@ -152,9 +148,6 @@ std::function get_funct(unsi } else if(funct3 == OPMVV || funct3 == OPMVX) switch(funct6) { - // case 0b001110: // VSLID1EUP - // case 0b001111: // VSLIDE1DOWN - // case 0b010111: // VCOMPRESS case 0b100000: // VDIVU return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t { if(vs1 == 0) @@ -493,13 +486,13 @@ void mask_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t v template std::function get_unary_fn(unsigned unary_op) { switch(unary_op) { - case 0b00111: // vsext.vf2 - case 0b00101: // vsext.vf4 - case 0b00011: // vsext.vf8 + case 0b00111: // VSEXT.VF2 + case 0b00101: // VSEXT.VF4 + case 0b00011: // VSEXT.VF8 return [](src2_elem_t vs2) { return static_cast>(vs2); }; - case 0b00110: // vzext.vf2 - case 0b00100: // vzext.vf4 - case 0b00010: // vzext.vf8 + case 0b00110: // VZEXT.VF2 + case 0b00100: // VZEXT.VF4 + case 0b00010: // VZEXT.VF8 return [](src2_elem_t vs2) { return vs2; }; default: throw new std::runtime_error("Unknown funct in get_unary_fn"); @@ -818,7 +811,7 @@ std::function get_red_funct(unsigned funct6, uns return [](dest_elem_t& running_total, src_elem_t vs2) { return running_total += static_cast(vs2); }; case 0b110001: // VWREDSUM return [](dest_elem_t& running_total, src_elem_t vs2) { - // cast the signed vs2 elem to unsigned to enable wraparound on overflow + // cast the signed vs2 elem to unsigned to enable wrap around on overflow return running_total += static_cast( static_cast>(static_cast>(vs2))); }; @@ -889,7 +882,7 @@ void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uin auto vs2_view = read_vmask(V, elem_count, vs2); auto vd_view = read_vmask(V, elem_count, vd); auto fn = get_mask_funct(funct6, funct3); // could be bool, but would break the make_signed_t in get_mask_funct - for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) { + for(unsigned idx = vstart; idx < vl; idx++) { unsigned new_bit_value = fn(vs2_view[idx], vs1_view[idx]); uint8_t* cur_mask_byte_addr = vd_view.start + idx / 8; unsigned cur_bit = idx % 8; @@ -906,4 +899,173 @@ void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uin } return; } +template uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2) { + uint64_t elem_count = VLEN; + auto vs2_view = read_vmask(V, elem_count, vs2); + vmask_view mask_reg = read_vmask(V, elem_count); + unsigned running_total = 0; + for(unsigned idx = vstart; idx < vl; idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active && vs2_view[idx]) + running_total += 1; + } + return running_total; +} +template uint64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2) { + uint64_t elem_count = VLEN; + auto vs2_view = read_vmask(V, elem_count, vs2); + vmask_view mask_reg = read_vmask(V, elem_count); + for(unsigned idx = vstart; idx < vl; idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active && vs2_view[idx]) + return idx; + } + return -1; +} +inline std::function get_mask_set_funct(unsigned enc) { + switch(enc) { + case 0b00001: // VMSBF + return [](bool& marker, bool vs2) { + if(marker) + return 0; + if(vs2) { + marker = true; + return 0; + } else + return 1; + }; + case 0b00010: // VMSOF + return [](bool& marker, bool vs2) { + if(marker) + return 0; + if(vs2) { + marker = true; + return 1; + } else + return 0; + }; + case 0b00011: // VMSIF + return [](bool& marker, bool vs2) { + if(marker) + return 0; + if(vs2) { + marker = true; + return 1; + } else + return 1; + }; + case 0b10001: // VID + default: + throw new std::runtime_error("Unknown enc in get_mask_set_funct"); + } +} +template void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2) { + uint64_t elem_count = VLEN; + auto vs2_view = read_vmask(V, elem_count, vs2); + auto vd_view = read_vmask(V, elem_count, vd); + vmask_view mask_reg = read_vmask(V, elem_count); + auto fn = get_mask_set_funct(enc); + bool marker = false; + for(unsigned idx = vstart; idx < vl; idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + unsigned new_bit_value = fn(marker, vs2_view[idx]); + uint8_t* cur_mask_byte_addr = vd_view.start + idx / 8; + unsigned cur_bit = idx % 8; + *cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast(new_bit_value) << cur_bit; + } + } + // the tail is all elements of the destination register beyond the first one + for(unsigned idx = vl; idx < VLEN; idx++) { + // always tail agnostic + // this is a nop, placeholder for vta behavior + unsigned new_bit_value = vd_view[idx]; + uint8_t* cur_mask_byte_addr = vd_view.start + idx / 8; + unsigned cur_bit = idx % 8; + *cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast(new_bit_value) << cur_bit; + } +} +template +void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2) { + uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew(); + auto vs2_view = read_vmask(V, elem_count, vs2); + auto vd_view = get_vreg(V, vd, elem_count); + vmask_view mask_reg = read_vmask(V, elem_count); + unsigned current = 0; + for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + vd_view[idx] = current; + if(vs2_view[idx]) + current += 1; + } + } + return; +} +template void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd) { + uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew(); + auto vd_view = get_vreg(V, vd, elem_count); + vmask_view mask_reg = read_vmask(V, elem_count); + for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + vd_view[idx] = idx; + } + } + return; +} +template uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector) { + auto vd_view = get_vreg(V, vd, 1); + if(to_vector) { + vd_view[0] = val; + for(unsigned idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) { + vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; + } + } + return static_cast(static_cast>(vd_view[0])); +} +template +void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) { + uint64_t elem_count = VLEN * vtype.lmul() / (sizeof(src_elem_t) * 8); + vmask_view mask_reg = read_vmask(V, elem_count); + auto vs2_view = get_vreg(V, vs2, elem_count); + auto vd_view = get_vreg(V, vd, elem_count); + for(unsigned idx = std::max(vstart, imm); idx < vl; idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + src_elem_t src_elem = 0; + if(imm >= 0 || (idx - imm < elem_count)) + src_elem = vs2_view[idx - imm]; + if(mask_active) { + vd_view[idx] = src_elem; + } else { + vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; + } + } + for(unsigned idx = vl; idx < elem_count; idx++) { + vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; + } + return; +} +template +void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) { + vector_slide(V, vl, vstart, vtype, vm, vd, vs2, 1); + vmask_view mask_reg = read_vmask(V, 1); + auto vd_view = get_vreg(V, vd, 1); + if(vm || mask_reg[0]) + vd_view[0] = imm; + else + vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0]; +} +template +void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) { + vector_slide(V, vl, vstart, vtype, vm, vd, vs2, -1); + if(vl > 0) { + vmask_view mask_reg = read_vmask(V, vl); + auto vd_view = get_vreg(V, vd, vl); + if(vm || mask_reg[vl - 1]) + vd_view[vl - 1] = imm; + else + vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0]; + } +} } // namespace softvector \ No newline at end of file