adds floating point reduction instrs, widening are untested
This commit is contained in:
parent
60d2b45a81
commit
8746003d3e
@ -750,7 +750,33 @@ if(vector != null) {%>
|
|||||||
void fp_vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
|
void fp_vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
|
||||||
return vector_slide1down(V, vl, vstart, vtype, vm, vd, vs2, imm, sew_val);
|
return vector_slide1down(V, vl, vstart, vtype, vm, vd, vs2, imm, sew_val);
|
||||||
}
|
}
|
||||||
|
void fp_vector_red_op(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
|
||||||
|
switch(sew_val){
|
||||||
|
case 0b000:
|
||||||
|
throw new std::runtime_error("Unsupported sew bit value");
|
||||||
|
case 0b001:
|
||||||
|
throw new std::runtime_error("Unsupported sew bit value");
|
||||||
|
case 0b010:
|
||||||
|
return softvector::fp_vector_red_op<${vlen}, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
|
||||||
|
case 0b011:
|
||||||
|
return softvector::fp_vector_red_op<${vlen}, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
|
||||||
|
default:
|
||||||
|
throw new std::runtime_error("Unsupported sew bit value");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void fp_vector_red_wv(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t rm, uint8_t sew_val){
|
||||||
|
switch(sew_val){
|
||||||
|
case 0b000:
|
||||||
|
throw new std::runtime_error("Unsupported sew bit value");
|
||||||
|
case 0b001:
|
||||||
|
return softvector::fp_vector_red_op<${vlen}, uint32_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
|
||||||
|
case 0b010:
|
||||||
|
return softvector::fp_vector_red_op<${vlen}, uint64_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vm, vd, vs2, vs1, rm);
|
||||||
|
case 0b011: // would require 128 bits vs2 value
|
||||||
|
default:
|
||||||
|
throw new std::runtime_error("Unsupported sew bit value");
|
||||||
|
}
|
||||||
|
}
|
||||||
<%}%>
|
<%}%>
|
||||||
uint64_t fetch_count{0};
|
uint64_t fetch_count{0};
|
||||||
uint64_t tval{0};
|
uint64_t tval{0};
|
||||||
|
@ -122,6 +122,9 @@ void vector_imm_gather(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype,
|
|||||||
template <unsigned VLEN, typename scr_elem_t>
|
template <unsigned VLEN, typename scr_elem_t>
|
||||||
void vector_compress(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, unsigned vs2, unsigned vs1);
|
void vector_compress(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, unsigned vd, unsigned vs2, unsigned vs1);
|
||||||
template <unsigned VLEN> void vector_whole_move(uint8_t* V, unsigned vd, unsigned vs2, unsigned count);
|
template <unsigned VLEN> void vector_whole_move(uint8_t* V, unsigned vd, unsigned vs2, unsigned count);
|
||||||
|
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t = dest_elem_t>
|
||||||
|
void fp_vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
|
||||||
|
unsigned vs2, unsigned vs1, uint8_t rm);
|
||||||
} // namespace softvector
|
} // namespace softvector
|
||||||
#include "vm/vector_functions.hpp"
|
#include "vm/vector_functions.hpp"
|
||||||
#endif /* _VM_VECTOR_FUNCTIONS_H_ */
|
#endif /* _VM_VECTOR_FUNCTIONS_H_ */
|
||||||
|
@ -32,7 +32,13 @@
|
|||||||
// alex@minres.com - initial API and implementation
|
// alex@minres.com - initial API and implementation
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include "softfloat.h"
|
||||||
|
#include "softfloat_types.h"
|
||||||
|
#include "specialize.h"
|
||||||
|
#include "vm/fp_functions.h"
|
||||||
#include "vm/vector_functions.h"
|
#include "vm/vector_functions.h"
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstddef>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
@ -873,7 +879,7 @@ std::function<void(dest_elem_t&, src_elem_t)> get_red_funct(unsigned funct6, uns
|
|||||||
static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)));
|
static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)));
|
||||||
};
|
};
|
||||||
default:
|
default:
|
||||||
throw new std::runtime_error("Unknown funct6 in get_funct");
|
throw new std::runtime_error("Unknown funct6 in get_red_funct");
|
||||||
}
|
}
|
||||||
else if(funct3 == OPMVV || funct3 == OPMVX)
|
else if(funct3 == OPMVV || funct3 == OPMVX)
|
||||||
switch(funct6) {
|
switch(funct6) {
|
||||||
@ -902,10 +908,10 @@ std::function<void(dest_elem_t&, src_elem_t)> get_red_funct(unsigned funct6, uns
|
|||||||
static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)));
|
static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)));
|
||||||
};
|
};
|
||||||
default:
|
default:
|
||||||
throw new std::runtime_error("Unknown funct6 in get_funct");
|
throw new std::runtime_error("Unknown funct6 in get_red_funct");
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw new std::runtime_error("Unknown funct3 in get_funct");
|
throw new std::runtime_error("Unknown funct3 in get_red_funct");
|
||||||
}
|
}
|
||||||
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t>
|
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t>
|
||||||
void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
|
void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
|
||||||
@ -932,6 +938,165 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// might be that these exist somewhere in softfloat
|
||||||
|
template <typename src_elem_t> constexpr bool isNaN(src_elem_t x);
|
||||||
|
template <> constexpr bool isNaN<uint32_t>(uint32_t x) { return ((x & 0x7F800000) == 0x7F800000) && ((x & 0x007FFFFF) != 0); }
|
||||||
|
template <> constexpr bool isNaN<uint64_t>(uint64_t x) {
|
||||||
|
return ((x & 0x7FF0000000000000) == 0x7FF0000000000000) && ((x & 0x000FFFFFFFFFFFFF) != 0);
|
||||||
|
}
|
||||||
|
template <typename src_elem_t> constexpr bool isNegZero(src_elem_t x);
|
||||||
|
template <> constexpr bool isNegZero<uint32_t>(uint32_t x) { return x == 0x80000000; }
|
||||||
|
template <> constexpr bool isNegZero<uint64_t>(uint64_t x) { return x == 0x8000000000000000; }
|
||||||
|
template <typename src_elem_t> constexpr bool isPosZero(src_elem_t x);
|
||||||
|
template <> constexpr bool isPosZero<uint32_t>(uint32_t x) { return x == 0x00000000; }
|
||||||
|
template <> constexpr bool isPosZero<uint64_t>(uint64_t x) { return x == 0x0000000000000000; }
|
||||||
|
|
||||||
|
template <typename elem_size_t> elem_size_t fp_add(uint8_t, elem_size_t, elem_size_t);
|
||||||
|
template <> inline uint32_t fp_add<uint32_t>(uint8_t mode, uint32_t v1, uint32_t v2) { return fadd_s(v1, v2, mode); }
|
||||||
|
template <> inline uint64_t fp_add<uint64_t>(uint8_t mode, uint64_t v1, uint64_t v2) { return fadd_d(v1, v2, mode); }
|
||||||
|
template <typename dest_elem_t, typename src_elem_t> dest_elem_t widen_float(src_elem_t val) {
|
||||||
|
static_assert(sizeof(dest_elem_t) == 8 && sizeof(src_elem_t) == 4, "");
|
||||||
|
return static_cast<double>(static_cast<float>(val));
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename elem_size_t> elem_size_t fp_min(elem_size_t, elem_size_t);
|
||||||
|
template <> inline uint32_t fp_min<uint32_t>(uint32_t v1, uint32_t v2) {
|
||||||
|
bool v1_lt_v2 = fcmp_s(v1, v2, 2);
|
||||||
|
if(isNaN(v1) && isNaN(v2))
|
||||||
|
return defaultNaNF32UI;
|
||||||
|
else if(isNaN(v1))
|
||||||
|
return v2;
|
||||||
|
else if(isNaN(v2))
|
||||||
|
return v1;
|
||||||
|
else if(isNegZero(v1) && isNegZero(v2))
|
||||||
|
return v1;
|
||||||
|
else if(isNegZero(v2) && isNegZero(v1))
|
||||||
|
return v2;
|
||||||
|
else if(v1_lt_v2)
|
||||||
|
return v1;
|
||||||
|
else
|
||||||
|
return v2;
|
||||||
|
}
|
||||||
|
template <> inline uint64_t fp_min<uint64_t>(uint64_t v1, uint64_t v2) {
|
||||||
|
bool v1_lt_v2 = fcmp_d(v1, v2, 2);
|
||||||
|
if(isNaN(v1) && isNaN(v2))
|
||||||
|
return defaultNaNF32UI;
|
||||||
|
else if(isNaN(v1))
|
||||||
|
return v2;
|
||||||
|
else if(isNaN(v2))
|
||||||
|
return v1;
|
||||||
|
else if(isNegZero(v1) && isNegZero(v2))
|
||||||
|
return v1;
|
||||||
|
else if(isNegZero(v2) && isNegZero(v1))
|
||||||
|
return v2;
|
||||||
|
else if(v1_lt_v2)
|
||||||
|
return v1;
|
||||||
|
else
|
||||||
|
return v2;
|
||||||
|
}
|
||||||
|
template <typename elem_size_t> elem_size_t fp_max(elem_size_t, elem_size_t);
|
||||||
|
template <> inline uint32_t fp_max<uint32_t>(uint32_t v1, uint32_t v2) {
|
||||||
|
bool v1_lt_v2 = fcmp_s(v1, v2, 2);
|
||||||
|
if(isNaN(v1) && isNaN(v2))
|
||||||
|
return defaultNaNF32UI;
|
||||||
|
else if(isNaN(v1))
|
||||||
|
return v2;
|
||||||
|
else if(isNaN(v2))
|
||||||
|
return v1;
|
||||||
|
else if(isNegZero(v1) && isNegZero(v2))
|
||||||
|
return v2;
|
||||||
|
else if(isNegZero(v2) && isNegZero(v1))
|
||||||
|
return v1;
|
||||||
|
else if(v1_lt_v2)
|
||||||
|
return v2;
|
||||||
|
else
|
||||||
|
return v1;
|
||||||
|
}
|
||||||
|
template <> inline uint64_t fp_max<uint64_t>(uint64_t v1, uint64_t v2) {
|
||||||
|
bool v1_lt_v2 = fcmp_d(v1, v2, 2);
|
||||||
|
if(isNaN(v1) && isNaN(v2))
|
||||||
|
return defaultNaNF32UI;
|
||||||
|
else if(isNaN(v1))
|
||||||
|
return v2;
|
||||||
|
else if(isNaN(v2))
|
||||||
|
return v1;
|
||||||
|
else if(isNegZero(v1) && isNegZero(v2))
|
||||||
|
return v2;
|
||||||
|
else if(isNegZero(v2) && isNegZero(v1))
|
||||||
|
return v1;
|
||||||
|
else if(v1_lt_v2)
|
||||||
|
return v2;
|
||||||
|
else
|
||||||
|
return v1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename dest_elem_t, typename src_elem_t>
|
||||||
|
std::function<void(uint8_t, uint_fast8_t&, dest_elem_t&, src_elem_t)> get_fp_red_funct(unsigned funct6, unsigned funct3) {
|
||||||
|
if(funct3 == OPFVV || funct3 == OPFVF)
|
||||||
|
switch(funct6) {
|
||||||
|
case 0b000001: // VFREDUSUM
|
||||||
|
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
|
||||||
|
running_total = fp_add<dest_elem_t>(rm, running_total, vs2);
|
||||||
|
accrued_flags |= softfloat_exceptionFlags;
|
||||||
|
};
|
||||||
|
case 0b000011: // VFREDOSUM
|
||||||
|
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
|
||||||
|
running_total = fp_add<dest_elem_t>(rm, running_total, vs2);
|
||||||
|
accrued_flags |= softfloat_exceptionFlags;
|
||||||
|
};
|
||||||
|
case 0b000101: // VFREDMIN
|
||||||
|
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
|
||||||
|
running_total = fp_min<dest_elem_t>(running_total, vs2);
|
||||||
|
accrued_flags |= softfloat_exceptionFlags;
|
||||||
|
};
|
||||||
|
case 0b000111: // VFREDMAX
|
||||||
|
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
|
||||||
|
running_total = fp_max<dest_elem_t>(running_total, vs2);
|
||||||
|
accrued_flags |= softfloat_exceptionFlags;
|
||||||
|
};
|
||||||
|
case 0b110001: // VFWREDUSUM
|
||||||
|
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
|
||||||
|
running_total = fp_add<dest_elem_t>(rm, running_total, widen_float<dest_elem_t>(vs2));
|
||||||
|
accrued_flags |= softfloat_exceptionFlags;
|
||||||
|
};
|
||||||
|
case 0b110011: // VFWREDOSUM
|
||||||
|
return [](uint8_t rm, uint_fast8_t& accrued_flags, dest_elem_t& running_total, src_elem_t vs2) {
|
||||||
|
running_total = fp_add<dest_elem_t>(rm, running_total, widen_float<dest_elem_t>(vs2));
|
||||||
|
accrued_flags |= softfloat_exceptionFlags;
|
||||||
|
};
|
||||||
|
default:
|
||||||
|
throw new std::runtime_error("Unknown funct6 in get_fp_red_funct");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw new std::runtime_error("Unknown funct3 in get_fp_red_funct");
|
||||||
|
}
|
||||||
|
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t>
|
||||||
|
void fp_vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
|
||||||
|
unsigned vs2, unsigned vs1, uint8_t rm) {
|
||||||
|
if(vl == 0)
|
||||||
|
return;
|
||||||
|
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||||
|
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||||
|
auto vs1_elem = get_vreg<VLEN, dest_elem_t>(V, vs1, elem_count)[0];
|
||||||
|
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, elem_count);
|
||||||
|
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, elem_count);
|
||||||
|
auto fn = get_fp_red_funct<dest_elem_t, src_elem_t>(funct6, funct3);
|
||||||
|
dest_elem_t& running_total = {vs1_elem};
|
||||||
|
uint_fast8_t accrued_flags = 0;
|
||||||
|
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
|
if(mask_active) {
|
||||||
|
fn(rm, accrued_flags, running_total, vs2_view[idx]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vd_view[0] = running_total;
|
||||||
|
softfloat_exceptionFlags = accrued_flags;
|
||||||
|
// the tail is all elements of the destination register beyond the first one
|
||||||
|
for(unsigned idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) {
|
||||||
|
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
template <unsigned VLEN>
|
template <unsigned VLEN>
|
||||||
void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1) {
|
void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1) {
|
||||||
uint64_t elem_count = VLEN;
|
uint64_t elem_count = VLEN;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user