|
|
|
@@ -863,14 +863,17 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
|
|
|
|
|
|
|
|
|
// might be that these exist somewhere in softfloat
|
|
|
|
|
template <typename src_elem_t> constexpr bool isNaN(src_elem_t x);
|
|
|
|
|
template <> constexpr bool isNaN<uint16_t>(uint16_t x) { return ((x & 0x7C00) == 0x7C00) && ((x & 0x03FF) != 0); }
|
|
|
|
|
template <> constexpr bool isNaN<uint32_t>(uint32_t x) { return ((x & 0x7F800000) == 0x7F800000) && ((x & 0x007FFFFF) != 0); }
|
|
|
|
|
template <> constexpr bool isNaN<uint64_t>(uint64_t x) {
|
|
|
|
|
return ((x & 0x7FF0000000000000) == 0x7FF0000000000000) && ((x & 0x000FFFFFFFFFFFFF) != 0);
|
|
|
|
|
}
|
|
|
|
|
template <typename src_elem_t> constexpr bool isNegZero(src_elem_t x);
|
|
|
|
|
template <> constexpr bool isNegZero<uint16_t>(uint16_t x) { return x == 0x8000; }
|
|
|
|
|
template <> constexpr bool isNegZero<uint32_t>(uint32_t x) { return x == 0x80000000; }
|
|
|
|
|
template <> constexpr bool isNegZero<uint64_t>(uint64_t x) { return x == 0x8000000000000000; }
|
|
|
|
|
template <typename src_elem_t> constexpr bool isPosZero(src_elem_t x);
|
|
|
|
|
template <> constexpr bool isPosZero<uint16_t>(uint16_t x) { return x == 0x0000; }
|
|
|
|
|
template <> constexpr bool isPosZero<uint32_t>(uint32_t x) { return x == 0x00000000; }
|
|
|
|
|
template <> constexpr bool isPosZero<uint64_t>(uint64_t x) { return x == 0x0000000000000000; }
|
|
|
|
|
|
|
|
|
@@ -880,30 +883,54 @@ template <typename dest_elem_t, typename src_elem_t> dest_elem_t widen_float(src
|
|
|
|
|
template <> inline uint64_t widen_float<uint64_t, uint32_t>(uint32_t val) { return f32_to_f64(float32_t{val}).v; }
|
|
|
|
|
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_add(uint8_t, elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_add<uint16_t>(uint8_t mode, uint16_t v2, uint16_t v1) { return fadd_h(v2, v1, mode); }
|
|
|
|
|
template <> inline uint32_t fp_add<uint32_t>(uint8_t mode, uint32_t v2, uint32_t v1) { return fadd_s(v2, v1, mode); }
|
|
|
|
|
template <> inline uint64_t fp_add<uint64_t>(uint8_t mode, uint64_t v2, uint64_t v1) { return fadd_d(v2, v1, mode); }
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_sub(uint8_t, elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_sub<uint16_t>(uint8_t mode, uint16_t v2, uint16_t v1) { return fsub_h(v2, v1, mode); }
|
|
|
|
|
template <> inline uint32_t fp_sub<uint32_t>(uint8_t mode, uint32_t v2, uint32_t v1) { return fsub_s(v2, v1, mode); }
|
|
|
|
|
template <> inline uint64_t fp_sub<uint64_t>(uint8_t mode, uint64_t v2, uint64_t v1) { return fsub_d(v2, v1, mode); }
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_mul(uint8_t, elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_mul<uint16_t>(uint8_t mode, uint16_t v2, uint16_t v1) { return fmul_h(v2, v1, mode); }
|
|
|
|
|
template <> inline uint32_t fp_mul<uint32_t>(uint8_t mode, uint32_t v2, uint32_t v1) { return fmul_s(v2, v1, mode); }
|
|
|
|
|
template <> inline uint64_t fp_mul<uint64_t>(uint8_t mode, uint64_t v2, uint64_t v1) { return fmul_d(v2, v1, mode); }
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_div(uint8_t, elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_div<uint16_t>(uint8_t mode, uint16_t v2, uint16_t v1) { return fdiv_h(v2, v1, mode); }
|
|
|
|
|
template <> inline uint32_t fp_div<uint32_t>(uint8_t mode, uint32_t v2, uint32_t v1) { return fdiv_s(v2, v1, mode); }
|
|
|
|
|
template <> inline uint64_t fp_div<uint64_t>(uint8_t mode, uint64_t v2, uint64_t v1) { return fdiv_d(v2, v1, mode); }
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_madd(uint8_t, elem_size_t, elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_madd<uint16_t>(uint8_t mode, uint16_t v2, uint16_t v1, uint16_t v3) { return fmadd_h(v1, v2, v3, 0, mode); }
|
|
|
|
|
template <> inline uint32_t fp_madd<uint32_t>(uint8_t mode, uint32_t v2, uint32_t v1, uint32_t v3) { return fmadd_s(v1, v2, v3, 0, mode); }
|
|
|
|
|
template <> inline uint64_t fp_madd<uint64_t>(uint8_t mode, uint64_t v2, uint64_t v1, uint64_t v3) { return fmadd_d(v1, v2, v3, 0, mode); }
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_nmadd(uint8_t, elem_size_t, elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_nmadd<uint16_t>(uint8_t mode, uint16_t v2, uint16_t v1, uint16_t v3) { return fmadd_h(v1, v2, v3, 2, mode); }
|
|
|
|
|
template <> inline uint32_t fp_nmadd<uint32_t>(uint8_t mode, uint32_t v2, uint32_t v1, uint32_t v3) { return fmadd_s(v1, v2, v3, 2, mode); }
|
|
|
|
|
template <> inline uint64_t fp_nmadd<uint64_t>(uint8_t mode, uint64_t v2, uint64_t v1, uint64_t v3) { return fmadd_d(v1, v2, v3, 2, mode); }
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_msub(uint8_t, elem_size_t, elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_msub<uint16_t>(uint8_t mode, uint16_t v2, uint16_t v1, uint16_t v3) { return fmadd_h(v1, v2, v3, 1, mode); }
|
|
|
|
|
template <> inline uint32_t fp_msub<uint32_t>(uint8_t mode, uint32_t v2, uint32_t v1, uint32_t v3) { return fmadd_s(v1, v2, v3, 1, mode); }
|
|
|
|
|
template <> inline uint64_t fp_msub<uint64_t>(uint8_t mode, uint64_t v2, uint64_t v1, uint64_t v3) { return fmadd_d(v1, v2, v3, 1, mode); }
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_nmsub(uint8_t, elem_size_t, elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_nmsub<uint16_t>(uint8_t mode, uint16_t v2, uint16_t v1, uint16_t v3) { return fmadd_h(v1, v2, v3, 3, mode); }
|
|
|
|
|
template <> inline uint32_t fp_nmsub<uint32_t>(uint8_t mode, uint32_t v2, uint32_t v1, uint32_t v3) { return fmadd_s(v1, v2, v3, 3, mode); }
|
|
|
|
|
template <> inline uint64_t fp_nmsub<uint64_t>(uint8_t mode, uint64_t v2, uint64_t v1, uint64_t v3) { return fmadd_d(v1, v2, v3, 3, mode); }
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_min(elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_min<uint16_t>(uint16_t v2, uint16_t v1) {
|
|
|
|
|
if(isNaN(v1) && isNaN(v2))
|
|
|
|
|
return defaultNaNF16UI;
|
|
|
|
|
else if(isNaN(v1))
|
|
|
|
|
return v2;
|
|
|
|
|
else if(isNaN(v2))
|
|
|
|
|
return v1;
|
|
|
|
|
else if(isNegZero(v1) && isNegZero(v2))
|
|
|
|
|
return v1;
|
|
|
|
|
else if(isNegZero(v2) && isNegZero(v1))
|
|
|
|
|
return v2;
|
|
|
|
|
else if(fcmp_h(v1, v2, 2))
|
|
|
|
|
return v1;
|
|
|
|
|
else
|
|
|
|
|
return v2;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_min<uint32_t>(uint32_t v2, uint32_t v1) {
|
|
|
|
|
if(isNaN(v1) && isNaN(v2))
|
|
|
|
|
return defaultNaNF32UI;
|
|
|
|
@@ -937,6 +964,22 @@ template <> inline uint64_t fp_min<uint64_t>(uint64_t v2, uint64_t v1) {
|
|
|
|
|
return v2;
|
|
|
|
|
}
|
|
|
|
|
template <typename elem_size_t> elem_size_t fp_max(elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_max<uint16_t>(uint16_t v2, uint16_t v1) {
|
|
|
|
|
if(isNaN(v1) && isNaN(v2))
|
|
|
|
|
return defaultNaNF16UI;
|
|
|
|
|
else if(isNaN(v1))
|
|
|
|
|
return v2;
|
|
|
|
|
else if(isNaN(v2))
|
|
|
|
|
return v1;
|
|
|
|
|
else if(isNegZero(v1) && isNegZero(v2))
|
|
|
|
|
return v2;
|
|
|
|
|
else if(isNegZero(v2) && isNegZero(v1))
|
|
|
|
|
return v1;
|
|
|
|
|
else if(fcmp_h(v1, v2, 2))
|
|
|
|
|
return v2;
|
|
|
|
|
else
|
|
|
|
|
return v1;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_max<uint32_t>(uint32_t v2, uint32_t v1) {
|
|
|
|
|
if(isNaN(v1) && isNaN(v2))
|
|
|
|
|
return defaultNaNF32UI;
|
|
|
|
@@ -1264,7 +1307,18 @@ template <typename elem_size_t> elem_size_t fp_fclass(elem_size_t);
|
|
|
|
|
template <> inline uint16_t fp_fclass<uint16_t>(uint16_t v2) { return fclass_h(v2); }
|
|
|
|
|
template <> inline uint32_t fp_fclass<uint32_t>(uint32_t v2) { return fclass_s(v2); }
|
|
|
|
|
template <> inline uint64_t fp_fclass<uint64_t>(uint64_t v2) { return fclass_d(v2); }
|
|
|
|
|
|
|
|
|
|
template <typename dest_elem_size_t, typename src_elem_size_t> dest_elem_size_t fp_f_to_ui(uint8_t, src_elem_size_t);
|
|
|
|
|
template <typename dest_elem_size_t, typename src_elem_size_t> dest_elem_size_t fp_f_to_i(uint8_t, src_elem_size_t);
|
|
|
|
|
template <typename dest_elem_size_t, typename src_elem_size_t> dest_elem_size_t fp_ui_to_f(uint8_t, src_elem_size_t);
|
|
|
|
|
template <typename dest_elem_size_t, typename src_elem_size_t> dest_elem_size_t fp_i_to_f(uint8_t, src_elem_size_t);
|
|
|
|
|
template <typename dest_elem_t, typename src_elem_t> dest_elem_t fp_f_to_f(uint8_t rm, src_elem_t val);
|
|
|
|
|
|
|
|
|
|
template <> inline uint16_t fp_f_to_ui<uint16_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f16_to_ui32(float16_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_f_to_ui<uint32_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
@@ -1275,6 +1329,143 @@ template <> inline uint64_t fp_f_to_ui<uint64_t, uint64_t>(uint8_t rm, uint64_t
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f64_to_ui64(float64_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <> inline uint16_t fp_f_to_i<uint16_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f16_to_i32(float16_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_f_to_i<uint32_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f32_to_i32(float32_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_f_to_i<uint64_t, uint64_t>(uint8_t rm, uint64_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f64_to_i64(float64_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <> inline uint16_t fp_ui_to_f<uint16_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui32_to_f16(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_ui_to_f<uint32_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui32_to_f32(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_ui_to_f<uint64_t, uint64_t>(uint8_t rm, uint64_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui64_to_f64(v2).v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <> inline uint16_t fp_i_to_f<uint16_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i32_to_f16(static_cast<int16_t>(v2)).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_i_to_f<uint32_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i32_to_f32(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_i_to_f<uint64_t, uint64_t>(uint8_t rm, uint64_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i64_to_f64(v2).v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename elem_t> std::function<elem_t(uint8_t, uint8_t&, elem_t)> get_fp_unary_fn(unsigned encoding_space, unsigned unary_op) {
|
|
|
|
|
if(encoding_space == 0b010011) // VFUNARY1
|
|
|
|
|
switch(unary_op) {
|
|
|
|
|
case 0b00000: // VFSQRT
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, elem_t vs2) {
|
|
|
|
|
elem_t val = fp_sqrt(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00100: // VFRSQRT7
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, elem_t vs2) {
|
|
|
|
|
elem_t val = fp_rsqrt7(vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00101: // VFREC7
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, elem_t vs2) {
|
|
|
|
|
elem_t val = fp_rec7(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b10000: // VFCLASS
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, elem_t vs2) {
|
|
|
|
|
elem_t val = fp_fclass(vs2);
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
default:
|
|
|
|
|
throw new std::runtime_error("Unknown funct in get_fp_unary_fn");
|
|
|
|
|
}
|
|
|
|
|
else if(encoding_space == 0b010010) // VFUNARY0
|
|
|
|
|
switch(unary_op) {
|
|
|
|
|
case 0b00000: // VFCVT.XU.F.V
|
|
|
|
|
case 0b00110: // VFCVT.RTZ.XU.F.V
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, elem_t vs2) {
|
|
|
|
|
elem_t val = fp_f_to_ui<elem_t, elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00001: // VFCVT.X.F.V
|
|
|
|
|
case 0b00111: // VFCVT.RTZ.X.F.V
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, elem_t vs2) {
|
|
|
|
|
elem_t val = fp_f_to_i<elem_t, elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00010: // VFCVT.F.XU.V
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, elem_t vs2) {
|
|
|
|
|
elem_t val = fp_ui_to_f<elem_t, elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00011: // VFCVT.F.X.V
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, elem_t vs2) {
|
|
|
|
|
elem_t val = fp_i_to_f<elem_t, elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
default:
|
|
|
|
|
throw new std::runtime_error("Unknown funct in get_fp_unary_fn");
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw new std::runtime_error("Unknown funct in get_fp_unary_fn");
|
|
|
|
|
}
|
|
|
|
|
template <unsigned VLEN, typename elem_t>
|
|
|
|
|
void fp_vector_unary_op(uint8_t* V, unsigned encoding_space, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm,
|
|
|
|
|
unsigned vd, unsigned vs2, uint8_t rm) {
|
|
|
|
|
uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew();
|
|
|
|
|
vmask_view mask_reg = read_vmask<VLEN>(V, vlmax);
|
|
|
|
|
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, vlmax);
|
|
|
|
|
auto vd_view = get_vreg<VLEN, elem_t>(V, vd, vlmax);
|
|
|
|
|
auto fn = get_fp_unary_fn<elem_t>(encoding_space, unary_op);
|
|
|
|
|
uint8_t accrued_flags = 0;
|
|
|
|
|
for(size_t idx = vstart; idx < vl; idx++) {
|
|
|
|
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
|
|
|
|
if(mask_active)
|
|
|
|
|
vd_view[idx] = fn(rm, accrued_flags, vs2_view[idx]);
|
|
|
|
|
else if(vtype.vma())
|
|
|
|
|
vd_view[idx] = agnostic_behavior(vd_view[idx]);
|
|
|
|
|
}
|
|
|
|
|
softfloat_exceptionFlags = accrued_flags;
|
|
|
|
|
if(vtype.vta())
|
|
|
|
|
for(size_t idx = vl; idx < vlmax; idx++)
|
|
|
|
|
vd_view[idx] = agnostic_behavior(vd_view[idx]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <> inline uint16_t fp_f_to_ui<uint16_t, uint8_t>(uint8_t rm, uint8_t v2) {
|
|
|
|
|
throw new std::runtime_error("Attempting illegal widening conversion");
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_f_to_ui<uint32_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
@@ -1285,29 +1476,145 @@ template <> inline uint64_t fp_f_to_ui<uint64_t, uint32_t>(uint8_t rm, uint32_t
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f32_to_ui64(float32_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <> inline uint16_t fp_f_to_i<uint16_t, uint8_t>(uint8_t rm, uint8_t v2) {
|
|
|
|
|
throw new std::runtime_error("Attempting illegal widening conversion");
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_f_to_i<uint32_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f16_to_i32(float16_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_f_to_i<uint64_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f32_to_i64(float32_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <> inline uint16_t fp_ui_to_f<uint16_t, uint8_t>(uint8_t rm, uint8_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui32_to_f16(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_ui_to_f<uint32_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui32_to_f32(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_ui_to_f<uint64_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui32_to_f64(v2).v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <> inline uint16_t fp_i_to_f<uint16_t, uint8_t>(uint8_t rm, uint8_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i32_to_f16(static_cast<int8_t>(v2)).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_i_to_f<uint32_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i32_to_f32(static_cast<int16_t>(v2)).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_i_to_f<uint64_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i32_to_f64(v2).v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <> inline uint16_t fp_f_to_f<uint16_t, uint8_t>(uint8_t rm, uint8_t val) {
|
|
|
|
|
throw new std::runtime_error("Attempting illegal widening conversion");
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_f_to_f<uint32_t, uint16_t>(uint8_t rm, uint16_t val) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f16_to_f32(float16_t{val}).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_f_to_f<uint64_t, uint32_t>(uint8_t rm, uint32_t val) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f32_to_f64(float32_t{val}).v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename dest_elem_t, typename src_elem_t>
|
|
|
|
|
std::function<dest_elem_t(uint8_t, uint8_t&, src_elem_t)> get_fp_widening_fn(unsigned unary_op) {
|
|
|
|
|
switch(unary_op) {
|
|
|
|
|
case 0b01000: // VFWCVT.XU.F.V
|
|
|
|
|
case 0b01110: // VFWCVT.RTZ.XU.F.V
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_f_to_ui<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b01001: // VFWCVT.X.F.V
|
|
|
|
|
case 0b01111: // VFWCVT.RTZ.X.F.V
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_f_to_i<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b01010: // VFWCVT.F.XU.V
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_ui_to_f<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b01011: // VFWCVT.F.X.V
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_i_to_f<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b01100: // VFWCVT.F.F.V
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_f_to_f<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
default:
|
|
|
|
|
throw new std::runtime_error("Unknown funct in get_fp_unary_fn");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t>
|
|
|
|
|
void fp_vector_unary_w(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
|
|
|
|
|
uint8_t rm) {
|
|
|
|
|
uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew();
|
|
|
|
|
vmask_view mask_reg = read_vmask<VLEN>(V, vlmax);
|
|
|
|
|
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, vlmax);
|
|
|
|
|
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, vlmax);
|
|
|
|
|
auto fn = get_fp_widening_fn<dest_elem_t, src_elem_t>(unary_op);
|
|
|
|
|
uint8_t accrued_flags = 0;
|
|
|
|
|
for(size_t idx = vstart; idx < vl; idx++) {
|
|
|
|
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
|
|
|
|
if(mask_active)
|
|
|
|
|
vd_view[idx] = fn(rm, accrued_flags, vs2_view[idx]);
|
|
|
|
|
else if(vtype.vma())
|
|
|
|
|
vd_view[idx] = agnostic_behavior(vd_view[idx]);
|
|
|
|
|
}
|
|
|
|
|
softfloat_exceptionFlags = accrued_flags;
|
|
|
|
|
if(vtype.vta())
|
|
|
|
|
for(size_t idx = vl; idx < vlmax; idx++)
|
|
|
|
|
vd_view[idx] = agnostic_behavior(vd_view[idx]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <> inline uint8_t fp_f_to_ui<uint8_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f16_to_ui32(float16_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint16_t fp_f_to_ui<uint16_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
// return f32_to_ui16(float32_t{v2}, rm, true);
|
|
|
|
|
throw new std::runtime_error("No conversion from f32 to ui16 implemented");
|
|
|
|
|
return f32_to_ui32(float32_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_f_to_ui<uint32_t, uint64_t>(uint8_t rm, uint64_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f64_to_ui32(float64_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <typename dest_elem_size_t, typename src_elem_size_t> dest_elem_size_t fp_f_to_i(uint8_t, src_elem_size_t);
|
|
|
|
|
template <> inline uint32_t fp_f_to_i<uint32_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f32_to_i32(float32_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_f_to_i<uint64_t, uint64_t>(uint8_t rm, uint64_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f64_to_i64(float64_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_f_to_i<uint32_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
|
|
|
|
|
template <> inline uint8_t fp_f_to_i<uint8_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f16_to_i32(float16_t{v2}, rm, true);
|
|
|
|
@@ -1315,40 +1622,16 @@ template <> inline uint32_t fp_f_to_i<uint32_t, uint16_t>(uint8_t rm, uint16_t v
|
|
|
|
|
template <> inline uint16_t fp_f_to_i<uint16_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
// return f32_to_i16(float32_t{v2}, rm, true);
|
|
|
|
|
throw new std::runtime_error("No conversion from f32 to i16 implemented");
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_f_to_i<uint64_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f32_to_i64(float32_t{v2}, rm, true);
|
|
|
|
|
return f32_to_i32(float32_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_f_to_i<uint32_t, uint64_t>(uint8_t rm, uint64_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f64_to_i32(float64_t{v2}, rm, true);
|
|
|
|
|
}
|
|
|
|
|
template <typename dest_elem_size_t, typename src_elem_size_t> dest_elem_size_t fp_ui_to_f(uint8_t, src_elem_size_t);
|
|
|
|
|
template <> inline uint32_t fp_ui_to_f<uint32_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui32_to_f32(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_ui_to_f<uint64_t, uint64_t>(uint8_t rm, uint64_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui64_to_f64(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_ui_to_f<uint32_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
// return ui16_to_f32(v2).v;
|
|
|
|
|
throw new std::runtime_error("No conversion from ui16 to f32 implemented");
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_ui_to_f<uint64_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui32_to_f64(v2).v;
|
|
|
|
|
|
|
|
|
|
template <> inline uint8_t fp_ui_to_f<uint8_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
throw new std::runtime_error("Attempting illegal narrowing conversion");
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint16_t fp_ui_to_f<uint16_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
@@ -1360,27 +1643,9 @@ template <> inline uint32_t fp_ui_to_f<uint32_t, uint64_t>(uint8_t rm, uint64_t
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return ui64_to_f32(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <typename dest_elem_size_t, typename src_elem_size_t> dest_elem_size_t fp_i_to_f(uint8_t, src_elem_size_t);
|
|
|
|
|
template <> inline uint32_t fp_i_to_f<uint32_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i32_to_f32(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_i_to_f<uint64_t, uint64_t>(uint8_t rm, uint64_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i64_to_f64(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_i_to_f<uint32_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
// return i16_to_f32(v2).v;
|
|
|
|
|
throw new std::runtime_error("No conversion from i16 to f32 implemented");
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_i_to_f<uint64_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i32_to_f64(v2).v;
|
|
|
|
|
|
|
|
|
|
template <> inline uint8_t fp_i_to_f<uint8_t, uint16_t>(uint8_t rm, uint16_t v2) {
|
|
|
|
|
throw new std::runtime_error("Attempting illegal narrowing conversion");
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint16_t fp_i_to_f<uint16_t, uint32_t>(uint8_t rm, uint32_t v2) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
@@ -1392,112 +1657,68 @@ template <> inline uint32_t fp_i_to_f<uint32_t, uint64_t>(uint8_t rm, uint64_t v
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return i64_to_f32(v2).v;
|
|
|
|
|
}
|
|
|
|
|
template <typename dest_elem_t, typename src_elem_t> dest_elem_t fp_f_to_f(uint8_t rm, src_elem_t val) {
|
|
|
|
|
throw new std::runtime_error("Conversion not explicitly specialized");
|
|
|
|
|
|
|
|
|
|
template <> inline uint8_t fp_f_to_f<uint8_t, uint16_t>(uint8_t rm, uint16_t val) {
|
|
|
|
|
throw new std::runtime_error("Attempting illegal narrowing conversion");
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint64_t fp_f_to_f<uint64_t, uint32_t>(uint8_t rm, uint32_t val) {
|
|
|
|
|
template <> inline uint16_t fp_f_to_f<uint16_t, uint32_t>(uint8_t rm, uint32_t val) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f32_to_f64(float32_t{val}).v;
|
|
|
|
|
return f32_to_f16(float32_t{val}).v;
|
|
|
|
|
}
|
|
|
|
|
template <> inline uint32_t fp_f_to_f<uint32_t, uint64_t>(uint8_t rm, uint64_t val) {
|
|
|
|
|
softfloat_exceptionFlags = 0;
|
|
|
|
|
softfloat_roundingMode = rm;
|
|
|
|
|
return f64_to_f32(float64_t{val}).v;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename dest_elem_t, typename src_elem_t = dest_elem_t>
|
|
|
|
|
std::function<dest_elem_t(uint8_t, uint8_t&, src_elem_t)> get_fp_unary_fn(unsigned encoding_space, unsigned unary_op) {
|
|
|
|
|
if(encoding_space == 0b010011) // VFUNARY1
|
|
|
|
|
switch(unary_op) {
|
|
|
|
|
case 0b00000: // VFSQRT
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_sqrt(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00100: // VFRSQRT7
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_rsqrt7(vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00101: // VFREC7
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_rec7(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b10000: // VFCLASS
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_fclass(vs2);
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
default:
|
|
|
|
|
throw new std::runtime_error("Unknown funct in get_fp_unary_fn");
|
|
|
|
|
}
|
|
|
|
|
else if(encoding_space == 0b010010) // VFUNARY0
|
|
|
|
|
switch(unary_op) {
|
|
|
|
|
case 0b00000: // VFCVT.XU.F.V
|
|
|
|
|
case 0b00110: // VFCVT.RTZ.XU.F.V
|
|
|
|
|
case 0b01000: // VFWCVT.XU.F.V
|
|
|
|
|
case 0b01110: // VFWCVT.RTZ.XU.F.V
|
|
|
|
|
case 0b10000: // VFNCVT.XU.F.W
|
|
|
|
|
case 0b10110: // VFNCVT.RTZ.XU.F.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_f_to_ui<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00001: // VFCVT.X.F.V
|
|
|
|
|
case 0b01001: // VFWCVT.X.F.V
|
|
|
|
|
case 0b10001: // VFNCVT.X.F.W
|
|
|
|
|
case 0b00111: // VFCVT.RTZ.X.F.V
|
|
|
|
|
case 0b01111: // VFWCVT.RTZ.X.F.V
|
|
|
|
|
case 0b10111: // VFNCVT.RTZ.X.F.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_f_to_i<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00010: // VFCVT.F.XU.V
|
|
|
|
|
case 0b01010: // VFWCVT.F.XU.V
|
|
|
|
|
case 0b10010: // VFNCVT.F.XU.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_ui_to_f<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b00011: // VFCVT.F.X.V
|
|
|
|
|
case 0b01011: // VFWCVT.F.X.V
|
|
|
|
|
case 0b10011: // VFNCVT.F.X.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_i_to_f<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b01100: // VFWCVT.F.F.V
|
|
|
|
|
case 0b10100: // VFNCVT.F.F.W
|
|
|
|
|
case 0b10101: // VFNCVT.ROD.F.F.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_f_to_f<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
default:
|
|
|
|
|
throw new std::runtime_error("Unknown funct in get_fp_unary_fn");
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
throw new std::runtime_error("Unknown funct in get_fp_unary_fn");
|
|
|
|
|
template <typename dest_elem_t, typename src_elem_t>
|
|
|
|
|
std::function<dest_elem_t(uint8_t, uint8_t&, src_elem_t)> get_fp_narrowing_fn(unsigned unary_op) {
|
|
|
|
|
switch(unary_op) {
|
|
|
|
|
case 0b10000: // VFNCVT.XU.F.W
|
|
|
|
|
case 0b10110: // VFNCVT.RTZ.XU.F.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_f_to_ui<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b10001: // VFNCVT.X.F.W
|
|
|
|
|
case 0b10111: // VFNCVT.RTZ.X.F.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_f_to_i<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b10010: // VFNCVT.F.XU.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_ui_to_f<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b10011: // VFNCVT.F.X.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_i_to_f<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
case 0b10100: // VFNCVT.F.F.W
|
|
|
|
|
case 0b10101: // VFNCVT.ROD.F.F.W
|
|
|
|
|
return [](uint8_t rm, uint8_t& accrued_flags, src_elem_t vs2) {
|
|
|
|
|
dest_elem_t val = fp_f_to_f<dest_elem_t, src_elem_t>(rm, vs2);
|
|
|
|
|
accrued_flags |= softfloat_exceptionFlags;
|
|
|
|
|
return val;
|
|
|
|
|
};
|
|
|
|
|
default:
|
|
|
|
|
throw new std::runtime_error("Unknown funct in get_fp_narrowing_fn");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t>
|
|
|
|
|
void fp_vector_unary_op(uint8_t* V, unsigned encoding_space, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm,
|
|
|
|
|
unsigned vd, unsigned vs2, uint8_t rm) {
|
|
|
|
|
void fp_vector_unary_n(uint8_t* V, unsigned unary_op, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
|
|
|
|
|
uint8_t rm) {
|
|
|
|
|
uint64_t vlmax = VLEN * vtype.lmul() / vtype.sew();
|
|
|
|
|
vmask_view mask_reg = read_vmask<VLEN>(V, vlmax);
|
|
|
|
|
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, vlmax);
|
|
|
|
|
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, vlmax);
|
|
|
|
|
auto fn = get_fp_unary_fn<dest_elem_t, src_elem_t>(encoding_space, unary_op);
|
|
|
|
|
auto fn = get_fp_narrowing_fn<dest_elem_t, src_elem_t>(unary_op);
|
|
|
|
|
uint8_t accrued_flags = 0;
|
|
|
|
|
for(size_t idx = vstart; idx < vl; idx++) {
|
|
|
|
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
|
|
|
@@ -1512,12 +1733,15 @@ void fp_vector_unary_op(uint8_t* V, unsigned encoding_space, unsigned unary_op,
|
|
|
|
|
vd_view[idx] = agnostic_behavior(vd_view[idx]);
|
|
|
|
|
}
|
|
|
|
|
template <typename elem_size_t> bool fp_eq(elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline bool fp_eq<uint16_t>(uint16_t v2, uint16_t v1) { return fcmp_h(v2, v1, 0); }
|
|
|
|
|
template <> inline bool fp_eq<uint32_t>(uint32_t v2, uint32_t v1) { return fcmp_s(v2, v1, 0); }
|
|
|
|
|
template <> inline bool fp_eq<uint64_t>(uint64_t v2, uint64_t v1) { return fcmp_d(v2, v1, 0); }
|
|
|
|
|
template <typename elem_size_t> bool fp_le(elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline bool fp_le<uint16_t>(uint16_t v2, uint16_t v1) { return fcmp_h(v2, v1, 1); }
|
|
|
|
|
template <> inline bool fp_le<uint32_t>(uint32_t v2, uint32_t v1) { return fcmp_s(v2, v1, 1); }
|
|
|
|
|
template <> inline bool fp_le<uint64_t>(uint64_t v2, uint64_t v1) { return fcmp_d(v2, v1, 1); }
|
|
|
|
|
template <typename elem_size_t> bool fp_lt(elem_size_t, elem_size_t);
|
|
|
|
|
template <> inline bool fp_lt<uint16_t>(uint16_t v2, uint16_t v1) { return fcmp_h(v2, v1, 2); }
|
|
|
|
|
template <> inline bool fp_lt<uint32_t>(uint32_t v2, uint32_t v1) { return fcmp_s(v2, v1, 2); }
|
|
|
|
|
template <> inline bool fp_lt<uint64_t>(uint64_t v2, uint64_t v1) { return fcmp_d(v2, v1, 2); }
|
|
|
|
|
template <typename elem_t> std::function<bool(uint8_t, uint8_t&, elem_t, elem_t)> get_fp_mask_funct(unsigned funct6) {
|
|
|
|
|