reworks merge instrs, adds fp comparisons
This commit is contained in:
parent
08280a094f
commit
c01eb39a76
@ -279,11 +279,11 @@ if(vector != null) {%>
|
||||
void vector_vector_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::vector_vector_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
|
||||
return softvector::vector_vector_op<${vlen}, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
case 0b001:
|
||||
return softvector::vector_vector_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
|
||||
return softvector::vector_vector_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
case 0b010:
|
||||
return softvector::vector_vector_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
|
||||
return softvector::vector_vector_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
case 0b011: // would widen to 128 bits
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
@ -422,11 +422,11 @@ if(vector != null) {%>
|
||||
void vector_vector_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::vector_vector_op<${vlen}, uint8_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
|
||||
return softvector::vector_vector_op<${vlen}, uint8_t, uint16_t, uint8_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
case 0b001:
|
||||
return softvector::vector_vector_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
|
||||
return softvector::vector_vector_op<${vlen}, uint16_t, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
case 0b010:
|
||||
return softvector::vector_vector_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1 );
|
||||
return softvector::vector_vector_op<${vlen}, uint32_t, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
case 0b011: // would require 128 bits vs2 value
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
@ -448,13 +448,13 @@ if(vector != null) {%>
|
||||
void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::vector_vector_op<${vlen}, uint8_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, vs1, softvector::carry_t::NO_CARRY, true);
|
||||
return softvector::vector_vector_merge<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
case 0b001:
|
||||
return softvector::vector_vector_op<${vlen}, uint16_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, vs1, softvector::carry_t::NO_CARRY, true);
|
||||
return softvector::vector_vector_merge<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
case 0b010:
|
||||
return softvector::vector_vector_op<${vlen}, uint32_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, vs1, softvector::carry_t::NO_CARRY, true);
|
||||
return softvector::vector_vector_merge<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
case 0b011:
|
||||
return softvector::vector_vector_op<${vlen}, uint64_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, vs1, softvector::carry_t::NO_CARRY, true);
|
||||
return softvector::vector_vector_merge<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, vs1);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
@ -462,13 +462,13 @@ if(vector != null) {%>
|
||||
void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::vector_imm_op<${vlen}, uint8_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, imm, softvector::carry_t::NO_CARRY, true);
|
||||
return softvector::vector_imm_merge<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b001:
|
||||
return softvector::vector_imm_op<${vlen}, uint16_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, imm, softvector::carry_t::NO_CARRY, true);
|
||||
return softvector::vector_imm_merge<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b010:
|
||||
return softvector::vector_imm_op<${vlen}, uint32_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, imm, softvector::carry_t::NO_CARRY, true);
|
||||
return softvector::vector_imm_merge<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b011:
|
||||
return softvector::vector_imm_op<${vlen}, uint64_t>(V, 0, 0, vl, vstart, vtype, vm, vd, vs2, imm, softvector::carry_t::NO_CARRY, true);
|
||||
return softvector::vector_imm_merge<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
@ -871,6 +871,37 @@ if(vector != null) {%>
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
void mask_fp_vector_vector_op(uint8_t* V, uint8_t funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
case 0b001:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
case 0b010:
|
||||
return softvector::mask_fp_vector_vector_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
|
||||
case 0b011:
|
||||
return softvector::mask_fp_vector_vector_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
void mask_fp_vector_imm_op(uint8_t* V, uint8_t funct6, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t rm, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
case 0b001:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
case 0b010:
|
||||
return softvector::mask_fp_vector_imm_op<${vlen}, uint32_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm, rm);
|
||||
case 0b011:
|
||||
return softvector::mask_fp_vector_imm_op<${vlen}, uint64_t>(V, funct6, vl, vstart, vtype, vm, vd, vs2, imm, rm);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
void fp_vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
|
||||
vector_imm_merge(V, vl, vstart, vtype, vm, vd, vs2, imm, sew_val);
|
||||
}
|
||||
<%}%>
|
||||
uint64_t fetch_count{0};
|
||||
uint64_t tval{0};
|
||||
|
@ -84,10 +84,14 @@ uint64_t vector_load_store_index(void* core, std::function<bool(void*, uint64_t,
|
||||
uint8_t segment_size);
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
|
||||
void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
|
||||
unsigned vs2, unsigned vs1, carry_t carry = carry_t::NO_CARRY, bool merge = false);
|
||||
unsigned vs2, unsigned vs1, carry_t carry = carry_t::NO_CARRY);
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
|
||||
void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
|
||||
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry = carry_t::NO_CARRY, bool merge = false);
|
||||
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry = carry_t::NO_CARRY);
|
||||
template <unsigned VLEN, typename scr_elem_t>
|
||||
void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1);
|
||||
template <unsigned VLEN, typename scr_elem_t>
|
||||
void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm);
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t>
|
||||
void vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2);
|
||||
template <unsigned VLEN, typename elem_t>
|
||||
@ -144,6 +148,12 @@ void fp_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t = dest_elem_t>
|
||||
void fp_vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
|
||||
uint8_t rm);
|
||||
template <unsigned VLEN, typename elem_t>
|
||||
void mask_fp_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
|
||||
unsigned vs1, uint8_t rm);
|
||||
template <unsigned VLEN, typename elem_t>
|
||||
void mask_fp_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
|
||||
elem_t imm, uint8_t rm);
|
||||
} // namespace softvector
|
||||
#include "vm/vector_functions.hpp"
|
||||
#endif /* _VM_VECTOR_FUNCTIONS_H_ */
|
||||
|
@ -337,16 +337,9 @@ std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsi
|
||||
else
|
||||
throw new std::runtime_error("Unknown funct3 in get_funct");
|
||||
}
|
||||
template <typename dest_elem_t> std::function<dest_elem_t(bool, dest_elem_t, dest_elem_t)> get_merge_funct(bool vm) {
|
||||
if(vm) { // VMV
|
||||
return [](bool vm, dest_elem_t vs2, dest_elem_t vs1) { return vs1; };
|
||||
} else { // VMERGE
|
||||
return [](bool vm, dest_elem_t vs2, dest_elem_t vs1) { return vm ? vs1 : vs2; };
|
||||
}
|
||||
};
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t, typename src1_elem_t>
|
||||
void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
|
||||
unsigned vs2, unsigned vs1, carry_t carry, bool merge) {
|
||||
unsigned vs2, unsigned vs1, carry_t carry) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs1_view = get_vreg<VLEN, src1_elem_t>(V, vs1, elem_count);
|
||||
@ -355,12 +348,7 @@ void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
||||
auto fn = get_funct<dest_elem_t, src2_elem_t, src1_elem_t>(funct6, funct3);
|
||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||
// body is from vstart to min(elem_count, vl)
|
||||
if(merge) {
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
auto merge_fn = get_merge_funct<dest_elem_t>(vm);
|
||||
vd_view[idx] = merge_fn(mask_reg[idx], vs2_view[idx], vs1_view[idx]);
|
||||
}
|
||||
} else if(carry == carry_t::NO_CARRY) {
|
||||
if(carry == carry_t::NO_CARRY) {
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
@ -388,7 +376,7 @@ void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
||||
}
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t, typename src1_elem_t>
|
||||
void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
|
||||
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry, bool merge) {
|
||||
unsigned vs2, typename std::make_signed<src1_elem_t>::type imm, carry_t carry) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, src2_elem_t>(V, vs2, elem_count);
|
||||
@ -396,15 +384,7 @@ void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
||||
auto fn = get_funct<dest_elem_t, src2_elem_t, src1_elem_t>(funct6, funct3);
|
||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||
// body is from vstart to min(elem_count, vl)
|
||||
if(merge) {
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
auto cur_mask = mask_reg[idx];
|
||||
auto vd_val = vd_view[idx];
|
||||
auto vs2_val = vs2_view[idx];
|
||||
auto merge_fn = get_merge_funct<dest_elem_t>(vm);
|
||||
vd_view[idx] = merge_fn(mask_reg[idx], vs2_view[idx], imm);
|
||||
}
|
||||
} else if(carry == carry_t::NO_CARRY) {
|
||||
if(carry == carry_t::NO_CARRY) {
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
@ -430,6 +410,35 @@ void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <unsigned VLEN, typename scr_elem_t>
|
||||
void vector_vector_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, unsigned vs1) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs1_view = get_vreg<VLEN, scr_elem_t>(V, vs1, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, elem_count);
|
||||
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, elem_count);
|
||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active)
|
||||
vd_view[idx] = vs1_view[idx];
|
||||
else
|
||||
vd_view[idx] = vs2_view[idx];
|
||||
}
|
||||
}
|
||||
template <unsigned VLEN, typename scr_elem_t>
|
||||
void vector_imm_merge(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, scr_elem_t>(V, vs2, elem_count);
|
||||
auto vd_view = get_vreg<VLEN, scr_elem_t>(V, vd, elem_count);
|
||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active)
|
||||
vd_view[idx] = imm;
|
||||
else
|
||||
vd_view[idx] = vs2_view[idx];
|
||||
}
|
||||
}
|
||||
template <typename elem_t> std::function<bool(elem_t, elem_t)> get_mask_funct(unsigned funct6, unsigned funct3) {
|
||||
if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)
|
||||
switch(funct6) {
|
||||
@ -1055,15 +1064,11 @@ std::function<dest_elem_t(uint8_t, uint_fast8_t&, dest_elem_t, src2_elem_t, src1
|
||||
};
|
||||
case 0b000100: // VFMIN
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||
dest_elem_t val = fp_min<dest_elem_t>(vs2, vs1);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
return val;
|
||||
return fp_min<dest_elem_t>(vs2, vs1);
|
||||
};
|
||||
case 0b000110: // VFMAX
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||
dest_elem_t val = fp_max<dest_elem_t>(vs2, vs1);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
return val;
|
||||
return fp_max<dest_elem_t>(vs2, vs1);
|
||||
};
|
||||
case 0b100000: // VFDIV
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||
@ -1210,12 +1215,6 @@ std::function<dest_elem_t(uint8_t, uint_fast8_t&, dest_elem_t, src2_elem_t, src1
|
||||
return vs2 ^ (vs1 & sign_mask);
|
||||
};
|
||||
case 0b010111: // VFMERGE/VFMV
|
||||
case 0b011000: // VMFEQ
|
||||
case 0b011001: // VMFLE
|
||||
case 0b011011: // VMFLT
|
||||
case 0b011100: // VMFNE
|
||||
case 0b011101: // VMFGT
|
||||
case 0b011111: // VMFGE
|
||||
|
||||
default:
|
||||
throw new std::runtime_error("Unknown funct6 in get_fp_funct");
|
||||
@ -1297,12 +1296,10 @@ std::function<void(uint8_t, uint_fast8_t&, dest_elem_t&, src_elem_t)> get_fp_red
|
||||
case 0b000101: // VFREDMIN
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
|
||||
running_total = fp_min<dest_elem_t>(running_total, vs2);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
};
|
||||
case 0b000111: // VFREDMAX
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
|
||||
running_total = fp_max<dest_elem_t>(running_total, vs2);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
};
|
||||
case 0b110001: // VFWREDUSUM
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
|
||||
@ -1414,6 +1411,104 @@ void fp_vector_unary_op(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vst
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <typename elem_size_t> bool fp_eq(elem_size_t, elem_size_t);
|
||||
template <> inline bool fp_eq<uint32_t>(uint32_t v2, uint32_t v1) { return fcmp_s(v2, v1, 0); }
|
||||
template <> inline bool fp_eq<uint64_t>(uint64_t v2, uint64_t v1) { return fcmp_d(v2, v1, 0); }
|
||||
template <typename elem_size_t> bool fp_le(elem_size_t, elem_size_t);
|
||||
template <> inline bool fp_le<uint32_t>(uint32_t v2, uint32_t v1) { return fcmp_s(v2, v1, 1); }
|
||||
template <> inline bool fp_le<uint64_t>(uint64_t v2, uint64_t v1) { return fcmp_d(v2, v1, 1); }
|
||||
template <typename elem_size_t> bool fp_lt(elem_size_t, elem_size_t);
|
||||
template <> inline bool fp_lt<uint32_t>(uint32_t v2, uint32_t v1) { return fcmp_s(v2, v1, 2); }
|
||||
template <> inline bool fp_lt<uint64_t>(uint64_t v2, uint64_t v1) { return fcmp_d(v2, v1, 2); }
|
||||
template <typename elem_t> std::function<bool(uint8_t, uint_fast8_t&, elem_t, elem_t)> get_fp_mask_funct(unsigned funct6) {
|
||||
switch(funct6) {
|
||||
case 0b011000: // VMFEQ
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
|
||||
elem_t val = fp_eq(vs2, vs1);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
return val;
|
||||
};
|
||||
case 0b011001: // VMFLE
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
|
||||
elem_t val = fp_le(vs2, vs1);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
return val;
|
||||
};
|
||||
case 0b011011: // VMFLT
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
|
||||
elem_t val = fp_lt(vs2, vs1);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
return val;
|
||||
};
|
||||
case 0b011100: // VMFNE
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
|
||||
elem_t val = !fp_eq(vs2, vs1);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
return val;
|
||||
};
|
||||
case 0b011101: // VMFGT
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
|
||||
elem_t val = fp_lt(vs1, vs2);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
return val;
|
||||
};
|
||||
case 0b011111: // VMFGE
|
||||
return [](uint8_t rm, uint_fast8_t& accrued_flags, elem_t vs2, elem_t vs1) {
|
||||
elem_t val = fp_le(vs1, vs2);
|
||||
accrued_flags |= softfloat_exceptionFlags;
|
||||
return val;
|
||||
};
|
||||
default:
|
||||
throw new std::runtime_error("Unknown funct6 in get_fp_mask_funct");
|
||||
}
|
||||
}
|
||||
template <unsigned VLEN, typename elem_t>
|
||||
void mask_fp_vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
|
||||
unsigned vs1, uint8_t rm) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs1_view = get_vreg<VLEN, elem_t>(V, vs1, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
|
||||
vmask_view vd_mask_view = read_vmask<VLEN>(V, VLEN, vd);
|
||||
auto fn = get_fp_mask_funct<elem_t>(funct6);
|
||||
uint_fast8_t accrued_flags = 0;
|
||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], vs1_view[idx]);
|
||||
} else {
|
||||
vd_mask_view[idx] = vtype.vma() ? vd_mask_view[idx] : vd_mask_view[idx];
|
||||
}
|
||||
}
|
||||
softfloat_exceptionFlags = accrued_flags;
|
||||
for(unsigned idx = vl; idx < VLEN; idx++) {
|
||||
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <unsigned VLEN, typename elem_t>
|
||||
void mask_fp_vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
|
||||
elem_t imm, uint8_t rm) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
|
||||
vmask_view vd_mask_view = read_vmask<VLEN>(V, VLEN, vd);
|
||||
auto fn = get_fp_mask_funct<elem_t>(funct6);
|
||||
uint_fast8_t accrued_flags = 0;
|
||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
vd_mask_view[idx] = fn(rm, accrued_flags, vs2_view[idx], imm);
|
||||
} else {
|
||||
vd_mask_view[idx] = vtype.vma() ? vd_mask_view[idx] : vd_mask_view[idx];
|
||||
}
|
||||
}
|
||||
softfloat_exceptionFlags = accrued_flags;
|
||||
for(unsigned idx = vl; idx < VLEN; idx++) {
|
||||
vd_mask_view[idx] = vtype.vta() ? vd_mask_view[idx] : vd_mask_view[idx];
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <unsigned VLEN>
|
||||
void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1) {
|
||||
uint64_t elem_count = VLEN;
|
||||
|
Loading…
x
Reference in New Issue
Block a user