reworks merge instrs, adds fp comparisons

This commit is contained in:
Eyck-Alexander Jentzsch 2025-03-04 12:19:18 +01:00
parent 08280a094f
commit c01eb39a76
3 changed files with 190 additions and 54 deletions

View File

@ -279,11 +279,11 @@ if(vector != null) {%>
void vector_vector_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_vector_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b001:
return softvector::vector_vector_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b010:
return softvector::vector_vector_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b011: // would widen to 128 bits
default:
throw new std::runtime_error("Unsupported sew bit value");
@ -422,11 +422,11 @@ if(vector != null) {%>
void vector_vector_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_vector_op<${vlen}, uint8_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint8_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b001:
return softvector::vector_vector_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b010:
return softvector::vector_vector_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
return softvector::vector_vector_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b011: // would require 128 bits vs2 value
default:
throw new std::runtime_error("Unsupported sew bit value");
@ -448,13 +448,13 @@ if(vector != null) {%>
void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_vector_op<${vlen}, uint8_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, vs1, softvector::carry_t::NO_CARRY, true);
return softvector::vector_vector_merge<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b001:
return softvector::vector_vector_op<${vlen}, uint16_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, vs1, softvector::carry_t::NO_CARRY, true);
return softvector::vector_vector_merge<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b010:
return softvector::vector_vector_op<${vlen}, uint32_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, vs1, softvector::carry_t::NO_CARRY, true);
return softvector::vector_vector_merge<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
case 0b011:
return softvector::vector_vector_op<${vlen}, uint64_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, vs1, softvector::carry_t::NO_CARRY, true);
return softvector::vector_vector_merge<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
@ -462,13 +462,13 @@ if(vector != null) {%>
void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vector_imm_op<${vlen}, uint8_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, imm, softvector::carry_t::NO_CARRY, true);
return softvector::vector_imm_merge<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b001:
return softvector::vector_imm_op<${vlen}, uint16_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, imm, softvector::carry_t::NO_CARRY, true);
return softvector::vector_imm_merge<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b010:
return softvector::vector_imm_op<${vlen}, uint32_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, imm, softvector::carry_t::NO_CARRY, true);
return softvector::vector_imm_merge<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b011:
return softvector::vector_imm_op<${vlen}, uint64_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, imm, softvector::carry_t::NO_CARRY, true);
return softvector::vector_imm_merge<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
@ -871,6 +871,37 @@ if(vector != null) {%>
throw new std::runtime_error("Unsupported sew bit value");
}
}
void mask_fp_vector_vector_op(uint8_t* V, uint8_t funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
switch(sew_val){
case 0b000:
throw new std::runtime_error("Unsupported sew bit value");
case 0b001:
throw new std::runtime_error("Unsupported sew bit value");
case 0b010:
return softvector::mask_fp_vector_vector_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
case 0b011:
return softvector::mask_fp_vector_vector_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void mask_fp_vector_imm_op(uint8_t* V, uint8_t funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t rm, uint8_t sew_val){
switch(sew_val){
case 0b000:
throw new std::runtime_error("Unsupported sew bit value");
case 0b001:
throw new std::runtime_error("Unsupported sew bit value");
case 0b010:
return softvector::mask_fp_vector_imm_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm, rm);
case 0b011:
return softvector::mask_fp_vector_imm_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm, rm);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void fp_vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
vector_imm_merge(V, vl, vstart, vtype, vm, vd, vs2, imm, sew_val);
}
<%}%>
uint64_t fetch_count{0};
uint64_t tval{0};

View File

@ -84,10 +84,14 @@ uint64_t vector_load_store_index(void* core, std::function<bool(void*, uint64_t,
uint8_t segment_size);
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, unsigned vs1, carry_t carry = carry_t::NO_CARRY, bool merge = false);
unsigned vs2, unsigned vs1, carry_t carry = carry_t::NO_CARRY);
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry = carry_t::NO_CARRY, bool merge = false);
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry = carry_t::NO_CARRY);
template <unsigned VLEN, typename scr_elem_t>
void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1);
template <unsigned VLEN, typename scr_elem_t>
void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t>
void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2);
template <unsigned VLEN, typename elem_t>
@ -144,6 +148,12 @@ void fp_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t = dest_elem_t>
void fp_vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
uint8_t rm);
template <unsigned VLEN, typename elem_t>
void mask_fp_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
unsigned vs1, uint8_t rm);
template <unsigned VLEN, typename elem_t>
void mask_fp_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
elem_t imm, uint8_t rm);
} // namespace softvector
#include "vm/vector_functions.hpp"
#endif /* _VM_VECTOR_FUNCTIONS_H_ */

View File

@ -337,16 +337,9 @@ std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsi
else
throw new std::runtime_error("Unknown funct3 in get_funct");
}
template <typename dest_elem_t> std::function<dest_elem_t(bool, dest_elem_t, dest_elem_t)> get_merge_funct(bool vm) {
if(vm) { // VMV
return [](bool vm, dest_elem_t vs2, dest_elem_t vs1) { return vs1; };
} else { // VMERGE
return [](bool vm, dest_elem_t vs2, dest_elem_t vs1) { return vm ? vs1 : vs2; };
}
};
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t, typename src1_elem_t>
void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, unsigned vs1, carry_t carry, bool merge) {
unsigned vs2, unsigned vs1, carry_t carry) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs1_view = get_vreg<VLEN, src1_elem_t>(V, vs1, elem_count);
@ -355,12 +348,7 @@ void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
auto fn = get_funct<dest_elem_t, src2_elem_t, src1_elem_t>(funct6, funct3);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
if(merge) {
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
auto merge_fn = get_merge_funct<dest_elem_t>(vm);
vd_view[idx] = merge_fn(mask_reg[idx], vs2_view[idx], vs1_view[idx]);
}
} else if(carry == carry_t::NO_CARRY) {
if(carry == carry_t::NO_CARRY) {
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active) {
@ -388,7 +376,7 @@ void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
}
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t, typename src1_elem_t>
void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry, bool merge) {
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs2_view = get_vreg<VLEN, src2_elem_t>(V, vs2, elem_count);
@ -396,15 +384,7 @@ void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
auto fn = get_funct<dest_elem_t, src2_elem_t, src1_elem_t>(funct6, funct3);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
if(merge) {
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
auto cur_mask = mask_reg[idx];
auto vd_val = vd_view[idx];
auto vs2_val = vs2_view[idx];
auto merge_fn = get_merge_funct<dest_elem_t>(vm);
vd_view[idx] = merge_fn(mask_reg[idx], vs2_view[idx], imm);
}
} else if(carry == carry_t::NO_CARRY) {
if(carry == carry_t::NO_CARRY) {
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active) {
@ -430,6 +410,35 @@ void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
}
return;
}
template <unsigned VLEN, typename scr_elem_t>
void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs1_view = get_vreg<VLEN, scr_elem_t>(V, vs1, elem_count);
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, elem_count);
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, elem_count);
for(unsigned idx = vstart; idx < vl; idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active)
vd_view[idx] = vs1_view[idx];
else
vd_view[idx] = vs2_view[idx];
}
}
template <unsigned VLEN, typename scr_elem_t>
void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, elem_count);
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, elem_count);
for(unsigned idx = vstart; idx < vl; idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active)
vd_view[idx] = imm;
else
vd_view[idx] = vs2_view[idx];
}
}
template <typename elem_t> std::function<bool(elem_t, elem_t)> get_mask_funct(unsigned funct6, unsigned funct3) {
if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)
switch(funct6) {
@ -1055,15 +1064,11 @@ std::function<dest_elem_t(uint8_t, uint_fast8_t&, dest_elem_t, src2_elem_t, src1
};
case 0b000100: // VFMIN
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
dest_elem_t val = fp_min<dest_elem_t>(vs2, vs1);
accrued_flags |= softfloat_exceptionFlags;
return val;
return fp_min<dest_elem_t>(vs2, vs1);
};
case 0b000110: // VFMAX
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
dest_elem_t val = fp_max<dest_elem_t>(vs2, vs1);
accrued_flags |= softfloat_exceptionFlags;
return val;
return fp_max<dest_elem_t>(vs2, vs1);
};
case 0b100000: // VFDIV
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
@ -1210,12 +1215,6 @@ std::function<dest_elem_t(uint8_t, uint_fast8_t&, dest_elem_t, src2_elem_t, src1
return vs2 ^ (vs1 & sign_mask);
};
case 0b010111: // VFMERGE/VFMV
case 0b011000: // VMFEQ
case 0b011001: // VMFLE
case 0b011011: // VMFLT
case 0b011100: // VMFNE
case 0b011101: // VMFGT
case 0b011111: // VMFGE
default:
throw new std::runtime_error("Unknown funct6 in get_fp_funct");
@ -1297,12 +1296,10 @@ std::function<void(uint8_t, uint_fast8_t&, dest_elem_t&, src_elem_t)> get_fp_red
case 0b000101: // VFREDMIN
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
running_total = fp_min<dest_elem_t>(running_total, vs2);
accrued_flags |= softfloat_exceptionFlags;
};
case 0b000111: // VFREDMAX
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
running_total = fp_max<dest_elem_t>(running_total, vs2);
accrued_flags |= softfloat_exceptionFlags;
};
case 0b110001: // VFWREDUSUM
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
@ -1414,6 +1411,104 @@ void fp_vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vst
}
return;
}
template <typename elem_size_t> bool fp_eq(elem_size_t, elem_size_t);
template <> inline bool fp_eq<uint32_t>(uint32_t v2, uint32_t v1) { return fcmp_s(v2, v1, 0); }
template <> inline bool fp_eq<uint64_t>(uint64_t v2, uint64_t v1) { return fcmp_d(v2, v1, 0); }
template <typename elem_size_t> bool fp_le(elem_size_t, elem_size_t);
template <> inline bool fp_le<uint32_t>(uint32_t v2, uint32_t v1) { return fcmp_s(v2, v1, 1); }
template <> inline bool fp_le<uint64_t>(uint64_t v2, uint64_t v1) { return fcmp_d(v2, v1, 1); }
template <typename elem_size_t> bool fp_lt(elem_size_t, elem_size_t);
template <> inline bool fp_lt<uint32_t>(uint32_t v2, uint32_t v1) { return fcmp_s(v2, v1, 2); }
template <> inline bool fp_lt<uint64_t>(uint64_t v2, uint64_t v1) { return fcmp_d(v2, v1, 2); }
template <typename elem_t> std::function<bool(uint8_t, uint_fast8_t&, elem_t, elem_t)> get_fp_mask_funct(unsigned funct6) {
switch(funct6) {
case 0b011000: // VMFEQ
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
elem_t val = fp_eq(vs2, vs1);
accrued_flags |= softfloat_exceptionFlags;
return val;
};
case 0b011001: // VMFLE
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
elem_t val = fp_le(vs2, vs1);
accrued_flags |= softfloat_exceptionFlags;
return val;
};
case 0b011011: // VMFLT
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
elem_t val = fp_lt(vs2, vs1);
accrued_flags |= softfloat_exceptionFlags;
return val;
};
case 0b011100: // VMFNE
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
elem_t val = !fp_eq(vs2, vs1);
accrued_flags |= softfloat_exceptionFlags;
return val;
};
case 0b011101: // VMFGT
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
elem_t val = fp_lt(vs1, vs2);
accrued_flags |= softfloat_exceptionFlags;
return val;
};
case 0b011111: // VMFGE
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
elem_t val = fp_le(vs1, vs2);
accrued_flags |= softfloat_exceptionFlags;
return val;
};
default:
throw new std::runtime_error("Unknown funct6 in get_fp_mask_funct");
}
}
template <unsigned VLEN, typename elem_t>
void mask_fp_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
unsigned vs1, uint8_t rm) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs1_view = get_vreg<VLEN, elem_t>(V, vs1, elem_count);
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
vmask_view vd_mask_view = read_vmask<VLEN>(V, VLEN, vd);
auto fn = get_fp_mask_funct<elem_t>(funct6);
uint_fast8_t accrued_flags = 0;
for(unsigned idx = vstart; idx < vl; idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active) {
vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], vs1_view[idx]);
} else {
vd_mask_view[idx] = vtype.vma() ? vd_mask_view[idx] : vd_mask_view[idx];
}
}
softfloat_exceptionFlags = accrued_flags;
for(unsigned idx = vl; idx < VLEN; idx++) {
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
}
return;
}
template <unsigned VLEN, typename elem_t>
void mask_fp_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
elem_t imm, uint8_t rm) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
vmask_view vd_mask_view = read_vmask<VLEN>(V, VLEN, vd);
auto fn = get_fp_mask_funct<elem_t>(funct6);
uint_fast8_t accrued_flags = 0;
for(unsigned idx = vstart; idx < vl; idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active) {
vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], imm);
} else {
vd_mask_view[idx] = vtype.vma() ? vd_mask_view[idx] : vd_mask_view[idx];
}
}
softfloat_exceptionFlags = accrued_flags;
for(unsigned idx = vl; idx < VLEN; idx++) {
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
}
return;
}
template <unsigned VLEN>
void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1) {
uint64_t elem_count = VLEN;