diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index 0b3ed96..2cb0e3c 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -629,13 +629,13 @@ if(vector != null) {%> void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) { switch(sew_val){ case 0b000: - return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + return softvector::vector_slideup<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); case 0b001: - return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + return softvector::vector_slideup<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); case 0b010: - return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + return softvector::vector_slideup<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); case 0b011: - return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); + return softvector::vector_slideup<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); default: throw new std::runtime_error("Unsupported sew bit value"); } @@ -643,13 +643,13 @@ if(vector != null) {%> void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) { switch(sew_val){ case 0b000: - return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm); + return softvector::vector_slidedown<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); case 0b001: - return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm); + return softvector::vector_slidedown<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); case 0b010: - return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm); + return softvector::vector_slidedown<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); case 0b011: - return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm); + return softvector::vector_slidedown<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm); default: throw new std::runtime_error("Unsupported sew bit value"); } diff --git a/src/vm/vector_functions.h b/src/vm/vector_functions.h index b8d0201..1a51262 100644 --- a/src/vm/vector_functions.h +++ b/src/vm/vector_functions.h @@ -124,11 +124,13 @@ void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uns template void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd); template uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector); template -void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm); +void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm); template -void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm); +void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm); template -void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm); +void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm); +template +void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm); template void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1); template diff --git a/src/vm/vector_functions.hpp b/src/vm/vector_functions.hpp index 609dc62..ac47eca 100644 --- a/src/vm/vector_functions.hpp +++ b/src/vm/vector_functions.hpp @@ -349,7 +349,7 @@ void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) if(carry == carry_t::NO_CARRY) { - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]); @@ -358,18 +358,18 @@ void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, } } } else if(carry == carry_t::SUB_CARRY) { - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) - mask_reg[idx]; } } else { - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) + mask_reg[idx]; } } // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); - for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -385,7 +385,7 @@ void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) if(carry == carry_t::NO_CARRY) { - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm); @@ -394,18 +394,18 @@ void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui } } } else if(carry == carry_t::SUB_CARRY) { - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) - mask_reg[idx]; } } else { - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) + mask_reg[idx]; } } // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); - for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -417,7 +417,7 @@ void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype auto vs1_view = get_vreg(V, vs1, elem_count); auto vs2_view = get_vreg(V, vs2, elem_count); auto vd_view = get_vreg(V, vd, elem_count); - for(unsigned idx = vstart; idx < vl; idx++) { + for(size_t idx = vstart; idx < vl; idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) vd_view[idx] = vs1_view[idx]; @@ -431,7 +431,7 @@ void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, b vmask_view mask_reg = read_vmask(V, elem_count); auto vs2_view = get_vreg(V, vs2, elem_count); auto vd_view = get_vreg(V, vd, elem_count); - for(unsigned idx = vstart; idx < vl; idx++) { + for(size_t idx = vstart; idx < vl; idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) vd_view[idx] = imm; @@ -503,7 +503,7 @@ void mask_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_ auto fn = get_mask_funct(funct6, funct3); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_mask_view[idx] = fn(vs2_view[idx], vs1_view[idx]); @@ -513,7 +513,7 @@ void mask_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_ } // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail - for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < VLEN; idx++) { vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx]; } return; @@ -528,7 +528,7 @@ void mask_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t v auto fn = get_mask_funct(funct6, funct3); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_mask_view[idx] = fn(vs2_view[idx], imm); @@ -538,7 +538,7 @@ void mask_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t v } // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail - for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < VLEN; idx++) { vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx]; } return; @@ -567,7 +567,7 @@ void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart auto fn = get_unary_fn(unary_op); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = fn(vs2_view[idx]); @@ -578,7 +578,7 @@ void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); - for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -608,13 +608,13 @@ void carry_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vs auto fn = get_carry_funct(funct); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { elem_t carry = vm ? 0 : mask_reg[idx]; vd_mask_view[idx] = fn(vs2_view[idx], vs1_view[idx], carry); } // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail - for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < VLEN; idx++) { // always tail agnostic } return; @@ -629,13 +629,13 @@ void carry_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstar auto fn = get_carry_funct(funct); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { elem_t carry = vm ? 0 : mask_reg[idx]; vd_mask_view[idx] = fn(vs2_view[idx], imm, carry); } // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail - for(unsigned idx = std::min(elem_count, vl); idx < VLEN; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < VLEN; idx++) { // always tail agnostic } return; @@ -814,7 +814,7 @@ bool sat_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t auto fn = get_sat_funct(funct6, funct3); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { saturated |= fn(vxrm, vtype, vd_view[idx], vs2_view[idx], vs1_view[idx]); @@ -825,7 +825,7 @@ bool sat_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); - for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return saturated; @@ -841,7 +841,7 @@ bool sat_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl auto fn = get_sat_funct(funct6, funct3); // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { saturated |= fn(vxrm, vtype, vd_view[idx], vs2_view[idx], imm); @@ -852,7 +852,7 @@ bool sat_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); - for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return saturated; @@ -916,7 +916,7 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui auto vd_view = get_vreg(V, vd, elem_count); auto fn = get_red_funct(funct6, funct3); dest_elem_t& running_total = {vs1_elem}; - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { fn(running_total, vs2_view[idx]); @@ -924,7 +924,7 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui } vd_view[0] = running_total; // the tail is all elements of the destination register beyond the first one - for(unsigned idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) { + for(size_t idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -1228,7 +1228,7 @@ void fp_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t uint8_t accrued_flags = 0; // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = fn(rm, accrued_flags, vd_view[idx], vs2_view[idx], vs1_view[idx]); @@ -1240,7 +1240,7 @@ void fp_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); - for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -1256,7 +1256,7 @@ void fp_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint8_t accrued_flags = 0; // elements w/ index smaller than vstart are in the prestart and get skipped // body is from vstart to min(elem_count, vl) - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = fn(rm, accrued_flags, vd_view[idx], vs2_view[idx], imm); @@ -1268,7 +1268,7 @@ void fp_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, // elements w/ index larger than elem_count are in the tail (fractional LMUL) // elements w/ index larger than vl are in the tail unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); - for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -1324,7 +1324,7 @@ void fp_vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, auto fn = get_fp_red_funct(funct6, funct3); dest_elem_t& running_total = {vs1_elem}; uint8_t accrued_flags = 0; - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { fn(rm, accrued_flags, running_total, vs2_view[idx]); @@ -1333,7 +1333,7 @@ void fp_vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, vd_view[0] = running_total; softfloat_exceptionFlags = accrued_flags; // the tail is all elements of the destination register beyond the first one - for(unsigned idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) { + for(size_t idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -1517,7 +1517,7 @@ void fp_vector_unary_op(uint8_t* V, unsigned encoding_space, unsigned unary_op, auto vd_view = get_vreg(V, vd, elem_count); auto fn = get_fp_unary_fn(encoding_space, unary_op); uint8_t accrued_flags = 0; - for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + for(size_t idx = vstart; idx < std::min(elem_count, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = fn(rm, accrued_flags, vs2_view[idx]); @@ -1527,7 +1527,7 @@ void fp_vector_unary_op(uint8_t* V, unsigned encoding_space, unsigned unary_op, } softfloat_exceptionFlags = accrued_flags; unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(dest_elem_t) * 8); - for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + for(size_t idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -1593,7 +1593,7 @@ void mask_fp_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vmask_view vd_mask_view = read_vmask(V, VLEN, vd); auto fn = get_fp_mask_funct(funct6); uint8_t accrued_flags = 0; - for(unsigned idx = vstart; idx < vl; idx++) { + for(size_t idx = vstart; idx < vl; idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], vs1_view[idx]); @@ -1602,7 +1602,7 @@ void mask_fp_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t } } softfloat_exceptionFlags = accrued_flags; - for(unsigned idx = vl; idx < VLEN; idx++) { + for(size_t idx = vl; idx < VLEN; idx++) { vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx]; } return; @@ -1616,7 +1616,7 @@ void mask_fp_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vs vmask_view vd_mask_view = read_vmask(V, VLEN, vd); auto fn = get_fp_mask_funct(funct6); uint8_t accrued_flags = 0; - for(unsigned idx = vstart; idx < vl; idx++) { + for(size_t idx = vstart; idx < vl; idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], imm); @@ -1625,7 +1625,7 @@ void mask_fp_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vs } } softfloat_exceptionFlags = accrued_flags; - for(unsigned idx = vl; idx < VLEN; idx++) { + for(size_t idx = vl; idx < VLEN; idx++) { vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx]; } return; @@ -1637,11 +1637,11 @@ void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uin auto vs2_view = read_vmask(V, elem_count, vs2); auto vd_view = read_vmask(V, elem_count, vd); auto fn = get_mask_funct(funct6, funct3); // could be bool, but would break the make_signed_t in get_mask_funct - for(unsigned idx = vstart; idx < vl; idx++) { + for(size_t idx = vstart; idx < vl; idx++) { vd_view[idx] = fn(vs2_view[idx], vs1_view[idx]); } // the tail is all elements of the destination register beyond the first one - for(unsigned idx = 1; idx < VLEN; idx++) { + for(size_t idx = 1; idx < VLEN; idx++) { // always tail agnostic // this is a nop, placeholder for vta behavior vd_view[idx] = vd_view[idx]; @@ -1653,7 +1653,7 @@ template uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart auto vs2_view = read_vmask(V, elem_count, vs2); vmask_view mask_reg = read_vmask(V, elem_count); unsigned running_total = 0; - for(unsigned idx = vstart; idx < vl; idx++) { + for(size_t idx = vstart; idx < vl; idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active && vs2_view[idx]) running_total += 1; @@ -1664,7 +1664,7 @@ template uint64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstar uint64_t elem_count = VLEN; auto vs2_view = read_vmask(V, elem_count, vs2); vmask_view mask_reg = read_vmask(V, elem_count); - for(unsigned idx = vstart; idx < vl; idx++) { + for(size_t idx = vstart; idx < vl; idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active && vs2_view[idx]) return idx; @@ -1714,14 +1714,14 @@ template void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, vmask_view mask_reg = read_vmask(V, elem_count); auto fn = get_mask_set_funct(enc); bool marker = false; - for(unsigned idx = vstart; idx < vl; idx++) { + for(size_t idx = vstart; idx < vl; idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = fn(marker, vs2_view[idx]); } } // the tail is all elements of the destination register beyond the first one - for(unsigned idx = vl; idx < VLEN; idx++) { + for(size_t idx = vl; idx < VLEN; idx++) { // always tail agnostic // this is a nop, placeholder for vta behavior vd_view[idx] = vd_view[idx]; @@ -1734,7 +1734,7 @@ void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uns auto vd_view = get_vreg(V, vd, elem_count); vmask_view mask_reg = read_vmask(V, elem_count); unsigned current = 0; - for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) { + for(size_t idx = vstart; idx < std::min(vl, elem_count); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = current; @@ -1748,7 +1748,7 @@ template void vid(uint8_t* V, uint64_t vl, uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew(); auto vd_view = get_vreg(V, vd, elem_count); vmask_view mask_reg = read_vmask(V, elem_count); - for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) { + for(size_t idx = vstart; idx < std::min(vl, elem_count); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = idx; @@ -1761,37 +1761,53 @@ template uint64_t scalar_move(uint8_t* V, v auto vd_view = get_vreg(V, vd, vlmax); if(to_vector) { vd_view[0] = val; - for(unsigned idx = 1; idx < vlmax; idx++) { + for(size_t idx = 1; idx < vlmax; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } } return static_cast(static_cast>(vd_view[0])); } template -void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) { +void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) { uint64_t elem_count = VLEN * vtype.lmul() / (sizeof(src_elem_t) * 8); vmask_view mask_reg = read_vmask(V, elem_count); auto vs2_view = get_vreg(V, vs2, elem_count); auto vd_view = get_vreg(V, vd, elem_count); - for(unsigned idx = std::max(vstart, imm); idx < vl; idx++) { + for(size_t idx = std::max(vstart, imm); idx < vl; idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; - src_elem_t src_elem = 0; - if(imm >= 0 || (idx - imm < elem_count)) - src_elem = vs2_view[idx - imm]; if(mask_active) { - vd_view[idx] = src_elem; + vd_view[idx] = idx - imm < elem_count ? vs2_view[idx - imm] : 0; } else { vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; } } - for(unsigned idx = vl; idx < elem_count; idx++) { + for(size_t idx = vl; idx < elem_count; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; } template -void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) { - vector_slide(V, vl, vstart, vtype, vm, vd, vs2, 1); +void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) { + uint64_t elem_count = VLEN * vtype.lmul() / (sizeof(src_elem_t) * 8); + vmask_view mask_reg = read_vmask(V, elem_count); + auto vs2_view = get_vreg(V, vs2, elem_count); + auto vd_view = get_vreg(V, vd, elem_count); + for(size_t idx = vstart; idx < vl; idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + vd_view[idx] = std::numeric_limits::max() - idx > imm && idx + imm < elem_count ? vs2_view[idx + imm] : 0; + } else { + vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; + } + } + for(size_t idx = vl; idx < elem_count; idx++) { + vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; + } + return; +} +template +void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) { + vector_slideup(V, vl, vstart, vtype, vm, vd, vs2, 1); vmask_view mask_reg = read_vmask(V, 1); auto vd_view = get_vreg(V, vd, 1); if(vm || mask_reg[0]) @@ -1800,8 +1816,8 @@ void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bo vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0]; } template -void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) { - vector_slide(V, vl, vstart, vtype, vm, vd, vs2, -1); +void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) { + vector_slidedown(V, vl, vstart, vtype, vm, vd, vs2, 1); if(vl > 0) { vmask_view mask_reg = read_vmask(V, vl); auto vd_view = get_vreg(V, vd, vl); @@ -1818,7 +1834,7 @@ void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtyp auto vs1_view = get_vreg(V, vs1, vlmax); auto vs2_view = get_vreg(V, vs2, vlmax); auto vd_view = get_vreg(V, vd, vlmax); - for(unsigned idx = vstart; idx < std::min(vlmax, vl); idx++) { + for(size_t idx = vstart; idx < std::min(vlmax, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = (vs1_view[idx] >= vlmax) ? 0 : vs2_view[vs1_view[idx]]; @@ -1826,7 +1842,7 @@ void vector_vector_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtyp vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; } } - for(unsigned idx = vl; idx < vlmax; idx++) { + for(size_t idx = vl; idx < vlmax; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -1837,7 +1853,7 @@ void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, vmask_view mask_reg = read_vmask(V, vlmax); auto vs2_view = get_vreg(V, vs2, vlmax); auto vd_view = get_vreg(V, vd, vlmax); - for(unsigned idx = vstart; idx < std::min(vlmax, vl); idx++) { + for(size_t idx = vstart; idx < std::min(vlmax, vl); idx++) { bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { vd_view[idx] = (imm >= vlmax) ? 0 : vs2_view[imm]; @@ -1845,7 +1861,7 @@ void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; } } - for(unsigned idx = vl; idx < vlmax; idx++) { + for(size_t idx = vl; idx < vlmax; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return; @@ -1857,13 +1873,13 @@ void vector_compress(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, un auto vs2_view = get_vreg(V, vs2, vlmax); auto vd_view = get_vreg(V, vd, vlmax); unsigned current_pos = 0; - for(unsigned idx = vstart; idx < std::min(vlmax, vl); idx++) { + for(size_t idx = vstart; idx < std::min(vlmax, vl); idx++) { if(mask_reg[idx]) { vd_view[current_pos] = vs2_view[idx]; current_pos += 1; } } - for(unsigned idx = current_pos; idx < vlmax; idx++) { + for(size_t idx = current_pos; idx < vlmax; idx++) { vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; } return;