From 453407568c20dabe9578606faaae0fccf465ecef Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Sun, 9 Mar 2025 15:07:53 +0100 Subject: [PATCH] removes carry_t, moves functionality to own functions --- gen_input/templates/interp/CORENAME.cpp.gtl | 51 +++++++++----- src/vm/vector_functions.h | 11 ++- src/vm/vector_functions.hpp | 75 ++++++++++++--------- 3 files changed, 89 insertions(+), 48 deletions(-) diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index 614264c..1294601 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -248,30 +248,30 @@ if(vector != null) {%> uint64_t vsxseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vs3, uint64_t rs1_val, uint8_t vs2, uint8_t segment_size, uint8_t index_byte_size, uint8_t data_byte_size, bool ordered){ return functionTable[map_index_size[index_byte_size]][data_byte_size](this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vs3, rs1_val, vs2, segment_size); } - void vector_vector_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val, int8_t carry = 0){ + void vector_vector_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){ switch(sew_val){ case 0b000: - return softvector::vector_vector_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast(carry)); + return softvector::vector_vector_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1); case 0b001: - return softvector::vector_vector_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast(carry)); + return softvector::vector_vector_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1); case 0b010: - return softvector::vector_vector_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast(carry)); + return softvector::vector_vector_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1); case 0b011: - return softvector::vector_vector_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, static_cast(carry)); + return softvector::vector_vector_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1); default: throw new std::runtime_error("Unsupported sew bit value"); } } - void vector_imm_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val, int8_t carry = 0){ + void vector_imm_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){ switch(sew_val){ case 0b000: - return softvector::vector_imm_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, static_cast(carry)); + return softvector::vector_imm_op<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm); case 0b001: - return softvector::vector_imm_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, static_cast(carry)); + return softvector::vector_imm_op<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm); case 0b010: - return softvector::vector_imm_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, static_cast(carry)); + return softvector::vector_imm_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm); case 0b011: - return softvector::vector_imm_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm, static_cast(carry)); + return softvector::vector_imm_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, imm); default: throw new std::runtime_error("Unsupported sew bit value"); } @@ -357,11 +357,32 @@ if(vector != null) {%> throw new std::runtime_error("Unsupported target_sew_pow"); } } - void vector_vector_m(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val, int8_t carry){ - vector_vector_op(V, funct6, funct3, vl, vstart, vtype, 0, vd, vs2, vs1, sew_val, carry); - } - void vector_imm_m(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val, int8_t carry){ - vector_imm_op(V, funct6, funct3, vl, vstart, vtype, 0, vd, vs2, imm, sew_val, carry); + void vector_vector_carry(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val, int8_t carry){ + switch(sew_val){ + case 0b000: + return softvector::vector_vector_carry<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, vs1, carry); + case 0b001: + return softvector::vector_vector_carry<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, vs1, carry); + case 0b010: + return softvector::vector_vector_carry<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, vs1, carry); + case 0b011: + return softvector::vector_vector_carry<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, vs1, carry); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } } + void vector_imm_carry(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val, int8_t carry){ + switch(sew_val){ + case 0b000: + return softvector::vector_imm_carry<${vlen}, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, imm, carry); + case 0b001: + return softvector::vector_imm_carry<${vlen}, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, imm, carry); + case 0b010: + return softvector::vector_imm_carry<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, imm, carry); + case 0b011: + return softvector::vector_imm_carry<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vd, vs2, imm, carry); + default: + throw new std::runtime_error("Unsupported sew bit value"); + } } void carry_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1, uint8_t sew_val){ switch(sew_val){ diff --git a/src/vm/vector_functions.h b/src/vm/vector_functions.h index 8af3ef7..0c34e2c 100644 --- a/src/vm/vector_functions.h +++ b/src/vm/vector_functions.h @@ -68,7 +68,6 @@ struct vmask_view { size_t elem_count; mask_bit_reference operator[](size_t) const; }; -enum class carry_t { NO_CARRY = 0, ADD_CARRY = 1, SUB_CARRY = 2 }; vmask_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx = 0); template vmask_view read_vmask(uint8_t* V, uint16_t elem_count, uint8_t reg_idx = 0); @@ -84,10 +83,16 @@ uint64_t vector_load_store_index(void* core, std::function void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, - unsigned vs2, unsigned vs1, carry_t carry = carry_t::NO_CARRY); + unsigned vs2, unsigned vs1); template void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, - unsigned vs2, typename std::make_signed::type imm, carry_t carry = carry_t::NO_CARRY); + unsigned vs2, typename std::make_signed::type imm); +template +void vector_vector_carry(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, + unsigned vs2, unsigned vs1, signed carry); +template +void vector_imm_carry(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, unsigned vs2, + typename std::make_signed::type imm, signed carry); template void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1); template diff --git a/src/vm/vector_functions.hpp b/src/vm/vector_functions.hpp index 6e59dc0..9beec73 100644 --- a/src/vm/vector_functions.hpp +++ b/src/vm/vector_functions.hpp @@ -336,54 +336,69 @@ std::function get_funct(unsi } template void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, - unsigned vs2, unsigned vs1, carry_t carry) { + unsigned vs2, unsigned vs1) { uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew(); vmask_view mask_reg = read_vmask(V, vlmax); auto vs1_view = get_vreg(V, vs1, vlmax); auto vs2_view = get_vreg(V, vs2, vlmax); auto vd_view = get_vreg(V, vd, vlmax); auto fn = get_funct(funct6, funct3); - if(carry == carry_t::NO_CARRY) - for(size_t idx = vstart; idx < vl; idx++) { - bool mask_active = vm ? 1 : mask_reg[idx]; - if(mask_active) - vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]); - else if(vtype.vma()) - vd_view[idx] = agnostic_behavior(vd_view[idx]); - } - else if(carry == carry_t::SUB_CARRY) - for(size_t idx = vstart; idx < vl; idx++) - vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) - mask_reg[idx]; - else - for(size_t idx = vstart; idx < vl; idx++) - vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) + mask_reg[idx]; + for(size_t idx = vstart; idx < vl; idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) + vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]); + else if(vtype.vma()) + vd_view[idx] = agnostic_behavior(vd_view[idx]); + } if(vtype.vta()) for(size_t idx = vl; idx < vlmax; idx++) vd_view[idx] = agnostic_behavior(vd_view[idx]); } template void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, - unsigned vs2, typename std::make_signed::type imm, carry_t carry) { + unsigned vs2, typename std::make_signed::type imm) { uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew(); vmask_view mask_reg = read_vmask(V, vlmax); auto vs2_view = get_vreg(V, vs2, vlmax); auto vd_view = get_vreg(V, vd, vlmax); auto fn = get_funct(funct6, funct3); - if(carry == carry_t::NO_CARRY) - for(size_t idx = vstart; idx < vl; idx++) { - bool mask_active = vm ? 1 : mask_reg[idx]; - if(mask_active) { - vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm); - } else { - vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; - } + for(size_t idx = vstart; idx < vl; idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm); + } else { + vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; } - else if(carry == carry_t::SUB_CARRY) - for(size_t idx = vstart; idx < vl; idx++) - vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) - mask_reg[idx]; - else - for(size_t idx = vstart; idx < vl; idx++) - vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) + mask_reg[idx]; + } + if(vtype.vta()) + for(size_t idx = vl; idx < vlmax; idx++) + vd_view[idx] = agnostic_behavior(vd_view[idx]); +} +template +void vector_vector_carry(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, + unsigned vs2, unsigned vs1, signed carry) { + uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew(); + vmask_view mask_reg = read_vmask(V, vlmax); + auto vs1_view = get_vreg(V, vs1, vlmax); + auto vs2_view = get_vreg(V, vs2, vlmax); + auto vd_view = get_vreg(V, vd, vlmax); + auto fn = get_funct(funct6, funct3); + for(size_t idx = vstart; idx < vl; idx++) + vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) + carry * mask_reg[idx]; + if(vtype.vta()) + for(size_t idx = vl; idx < vlmax; idx++) + vd_view[idx] = agnostic_behavior(vd_view[idx]); +} +template +void vector_imm_carry(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, unsigned vs2, + typename std::make_signed::type imm, signed carry) { + uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew(); + vmask_view mask_reg = read_vmask(V, vlmax); + auto vs2_view = get_vreg(V, vs2, vlmax); + auto vd_view = get_vreg(V, vd, vlmax); + auto fn = get_funct(funct6, funct3); + for(size_t idx = vstart; idx < vl; idx++) + vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) + carry * mask_reg[idx]; if(vtype.vta()) for(size_t idx = vl; idx < vlmax; idx++) vd_view[idx] = agnostic_behavior(vd_view[idx]);