adds narrowing fixed point instructions
This commit is contained in:
parent
a6f24db83a
commit
28ac169cfe
@ -441,6 +441,32 @@ if(vector != null) {%>
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
bool sat_vector_vector_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint64_t vxrm, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::sat_vector_vector_op<${vlen}, uint8_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
|
||||
case 0b001:
|
||||
return softvector::sat_vector_vector_op<${vlen}, uint16_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
|
||||
case 0b010:
|
||||
return softvector::sat_vector_vector_op<${vlen}, uint32_t, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, vs1);
|
||||
case 0b011: // would require 128 bits vs2 value
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
bool sat_vector_imm_vw(uint8_t* V, uint8_t funct6, uint8_t funct3, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, uint64_t vxrm, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::sat_vector_imm_op<${vlen}, uint8_t, uint16_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
|
||||
case 0b001:
|
||||
return softvector::sat_vector_imm_op<${vlen}, uint16_t, uint32_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
|
||||
case 0b010:
|
||||
return softvector::sat_vector_imm_op<${vlen}, uint32_t, uint64_t>(V, funct6, funct3, vl, vstart, vtype, vxrm, vm, vd, vs2, imm);
|
||||
case 0b011: // would require 128 bits vs2 value
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
<%}%>
|
||||
uint64_t fetch_count{0};
|
||||
uint64_t tval{0};
|
||||
|
@ -91,12 +91,12 @@ void carry_vector_vector_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vs
|
||||
template <unsigned VLEN, typename elem_t>
|
||||
void carry_vector_imm_op(uint8_t* V, unsigned funct, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2,
|
||||
typename std::make_signed<elem_t>::type imm);
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t = dest_elem_t>
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = dest_elem_t>
|
||||
bool sat_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, int64_t vxrm, bool vm,
|
||||
unsigned vd, unsigned vs2, unsigned vs1);
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t = dest_elem_t>
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = dest_elem_t>
|
||||
bool sat_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, int64_t vxrm, bool vm,
|
||||
unsigned vd, unsigned vs2, typename std::make_signed<src_elem_t>::type imm);
|
||||
unsigned vd, unsigned vs2, typename std::make_signed<src1_elem_t>::type imm);
|
||||
} // namespace softvector
|
||||
#include "vm/vector_functions.hpp"
|
||||
#endif /* _VM_VECTOR_FUNCTIONS_H_ */
|
||||
|
@ -606,15 +606,15 @@ template <typename T> T roundoff(T v, uint64_t d, int64_t vxrm) {
|
||||
unsigned r = get_rounding_increment(v, d, vxrm);
|
||||
return (v >> d) + r;
|
||||
}
|
||||
template <typename dest_elem_t, typename src_elem_t = dest_elem_t>
|
||||
std::function<bool(uint64_t, vtype_t, dest_elem_t&, src_elem_t, src_elem_t)> get_sat_funct(unsigned funct6, unsigned funct3) {
|
||||
template <typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_T = dest_elem_t>
|
||||
std::function<bool(uint64_t, vtype_t, dest_elem_t&, src2_elem_t, src1_elem_T)> get_sat_funct(unsigned funct6, unsigned funct3) {
|
||||
if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)
|
||||
switch(funct6) {
|
||||
case 0b100000: // VSADDU
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
auto res = static_cast<twice_t<src_elem_t>>(vs2) + static_cast<twice_t<src_elem_t>>(vs1);
|
||||
if(res > std::numeric_limits<src_elem_t>::max()) {
|
||||
vd = std::numeric_limits<src_elem_t>::max();
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto res = static_cast<twice_t<src1_elem_T>>(vs2) + static_cast<twice_t<src1_elem_T>>(vs1);
|
||||
if(res > std::numeric_limits<dest_elem_t>::max()) {
|
||||
vd = std::numeric_limits<dest_elem_t>::max();
|
||||
return 1;
|
||||
} else {
|
||||
vd = res;
|
||||
@ -622,14 +622,14 @@ std::function<bool(uint64_t, vtype_t, dest_elem_t&, src_elem_t, src_elem_t)> get
|
||||
}
|
||||
};
|
||||
case 0b100001: // VSADD
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) +
|
||||
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||
if(res < std::numeric_limits<std::make_signed_t<src_elem_t>>::min()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<src_elem_t>>::min();
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto res = static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) +
|
||||
static_cast<twice_t<std::make_signed_t<src1_elem_T>>>(static_cast<std::make_signed_t<src1_elem_T>>(vs1));
|
||||
if(res < std::numeric_limits<std::make_signed_t<dest_elem_t>>::min()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<dest_elem_t>>::min();
|
||||
return 1;
|
||||
} else if(res > std::numeric_limits<std::make_signed_t<src_elem_t>>::max()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<src_elem_t>>::max();
|
||||
} else if(res > std::numeric_limits<std::make_signed_t<dest_elem_t>>::max()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<dest_elem_t>>::max();
|
||||
return 1;
|
||||
} else {
|
||||
vd = res;
|
||||
@ -637,7 +637,7 @@ std::function<bool(uint64_t, vtype_t, dest_elem_t&, src_elem_t, src_elem_t)> get
|
||||
}
|
||||
};
|
||||
case 0b100010: // VSSUBU
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
if(vs2 < vs1) {
|
||||
vd = 0;
|
||||
return 1;
|
||||
@ -647,14 +647,14 @@ std::function<bool(uint64_t, vtype_t, dest_elem_t&, src_elem_t, src_elem_t)> get
|
||||
}
|
||||
};
|
||||
case 0b100011: // VSSUB
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) -
|
||||
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||
if(res < std::numeric_limits<std::make_signed_t<src_elem_t>>::min()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<src_elem_t>>::min();
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto res = static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) -
|
||||
static_cast<twice_t<std::make_signed_t<src1_elem_T>>>(static_cast<std::make_signed_t<src1_elem_T>>(vs1));
|
||||
if(res < std::numeric_limits<std::make_signed_t<dest_elem_t>>::min()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<dest_elem_t>>::min();
|
||||
return 1;
|
||||
} else if(res > std::numeric_limits<std::make_signed_t<src_elem_t>>::max()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<src_elem_t>>::max();
|
||||
} else if(res > std::numeric_limits<std::make_signed_t<dest_elem_t>>::max()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<dest_elem_t>>::max();
|
||||
return 1;
|
||||
} else {
|
||||
vd = res;
|
||||
@ -662,15 +662,15 @@ std::function<bool(uint64_t, vtype_t, dest_elem_t&, src_elem_t, src_elem_t)> get
|
||||
}
|
||||
};
|
||||
case 0b100111: // VSMUL
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
auto big_val = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) *
|
||||
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto big_val = static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) *
|
||||
static_cast<twice_t<std::make_signed_t<src1_elem_T>>>(static_cast<std::make_signed_t<src1_elem_T>>(vs1));
|
||||
auto res = roundoff(big_val, vtype.sew() - 1, vxrm);
|
||||
if(res < std::numeric_limits<std::make_signed_t<src_elem_t>>::min()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<src_elem_t>>::min();
|
||||
if(res < std::numeric_limits<std::make_signed_t<dest_elem_t>>::min()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<dest_elem_t>>::min();
|
||||
return 1;
|
||||
} else if(res > std::numeric_limits<std::make_signed_t<src_elem_t>>::max()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<src_elem_t>>::max();
|
||||
} else if(res > std::numeric_limits<std::make_signed_t<dest_elem_t>>::max()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<dest_elem_t>>::max();
|
||||
return 1;
|
||||
} else {
|
||||
vd = res;
|
||||
@ -678,45 +678,68 @@ std::function<bool(uint64_t, vtype_t, dest_elem_t&, src_elem_t, src_elem_t)> get
|
||||
}
|
||||
};
|
||||
case 0b101010: // VSSRL
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
vd = roundoff(vs2, vs1 & shift_mask<src_elem_t>(), vxrm);
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
vd = roundoff(vs2, vs1 & shift_mask<src1_elem_T>(), vxrm);
|
||||
return 0;
|
||||
};
|
||||
case 0b101011: // VSSRA
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
vd = roundoff(static_cast<std::make_signed_t<src_elem_t>>(vs2), vs1 & shift_mask<src_elem_t>(), vxrm);
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
vd = roundoff(static_cast<std::make_signed_t<src2_elem_t>>(vs2), vs1 & shift_mask<src1_elem_T>(), vxrm);
|
||||
return 0;
|
||||
};
|
||||
// case 0b101110: // VNCLIPU
|
||||
// case 0b101111: // VNCLIP
|
||||
case 0b101110: // VNCLIPU
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto res = roundoff(vs2, vs1 & shift_mask<src2_elem_t>(), vxrm);
|
||||
if(res > std::numeric_limits<dest_elem_t>::max()) {
|
||||
vd = std::numeric_limits<dest_elem_t>::max();
|
||||
return 1;
|
||||
} else {
|
||||
vd = res;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
case 0b101111: // VNCLIP
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto res = roundoff(static_cast<std::make_signed_t<src2_elem_t>>(vs2), vs1 & shift_mask<src2_elem_t>(), vxrm);
|
||||
if(res < std::numeric_limits<std::make_signed_t<dest_elem_t>>::min()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<dest_elem_t>>::min();
|
||||
return 1;
|
||||
} else if(res > std::numeric_limits<std::make_signed_t<dest_elem_t>>::max()) {
|
||||
vd = std::numeric_limits<std::make_signed_t<dest_elem_t>>::max();
|
||||
return 1;
|
||||
} else {
|
||||
vd = res;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
default:
|
||||
throw new std::runtime_error("Unknown funct6 in get_sat_funct");
|
||||
}
|
||||
else if(funct3 == OPMVV || funct3 == OPMVX)
|
||||
switch(funct6) {
|
||||
case 0b001000: // VAADDU
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
auto res = static_cast<twice_t<src_elem_t>>(vs2) + static_cast<twice_t<src_elem_t>>(vs1);
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto res = static_cast<twice_t<src2_elem_t>>(vs2) + static_cast<twice_t<src1_elem_T>>(vs1);
|
||||
vd = roundoff(res, 1, vxrm);
|
||||
return 0;
|
||||
};
|
||||
case 0b001001: // VAADD
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) +
|
||||
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto res = static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) +
|
||||
static_cast<twice_t<std::make_signed_t<src1_elem_T>>>(static_cast<std::make_signed_t<src1_elem_T>>(vs1));
|
||||
vd = roundoff(res, 1, vxrm);
|
||||
return 0;
|
||||
};
|
||||
case 0b001010: // VASUBU
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
auto res = static_cast<twice_t<src_elem_t>>(vs2) - static_cast<twice_t<src_elem_t>>(vs1);
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto res = static_cast<twice_t<src2_elem_t>>(vs2) - static_cast<twice_t<src1_elem_T>>(vs1);
|
||||
vd = roundoff(res, 1, vxrm);
|
||||
return 0;
|
||||
};
|
||||
case 0b001011: // VASUB
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) -
|
||||
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src2_elem_t vs2, src1_elem_T vs1) {
|
||||
auto res = static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) -
|
||||
static_cast<twice_t<std::make_signed_t<src1_elem_T>>>(static_cast<std::make_signed_t<src1_elem_T>>(vs1));
|
||||
vd = roundoff(res, 1, vxrm);
|
||||
return 0;
|
||||
};
|
||||
@ -726,16 +749,16 @@ std::function<bool(uint64_t, vtype_t, dest_elem_t&, src_elem_t, src_elem_t)> get
|
||||
else
|
||||
throw new std::runtime_error("Unknown funct3 in get_sat_funct");
|
||||
}
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t>
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t, typename src1_elem_T>
|
||||
bool sat_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, int64_t vxrm, bool vm,
|
||||
unsigned vd, unsigned vs2, unsigned vs1) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
bool saturated = false;
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs1_view = get_vreg<VLEN, src_elem_t>(V, vs1, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, elem_count);
|
||||
auto vs1_view = get_vreg<VLEN, src1_elem_T>(V, vs1, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, src2_elem_t>(V, vs2, elem_count);
|
||||
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, elem_count);
|
||||
auto fn = get_sat_funct<dest_elem_t, src_elem_t>(funct6, funct3);
|
||||
auto fn = get_sat_funct<dest_elem_t, src2_elem_t, src1_elem_T>(funct6, funct3);
|
||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||
// body is from vstart to min(elem_count, vl)
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
@ -754,15 +777,15 @@ bool sat_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t
|
||||
}
|
||||
return saturated;
|
||||
}
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src_elem_t>
|
||||
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t, typename src1_elem_T>
|
||||
bool sat_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, int64_t vxrm, bool vm,
|
||||
unsigned vd, unsigned vs2, typename std::make_signed<src_elem_t>::type imm) {
|
||||
unsigned vd, unsigned vs2, typename std::make_signed<src1_elem_T>::type imm) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
bool saturated = false;
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, src2_elem_t>(V, vs2, elem_count);
|
||||
auto vd_view = get_vreg<VLEN, dest_elem_t>(V, vd, elem_count);
|
||||
auto fn = get_sat_funct<dest_elem_t, src_elem_t>(funct6, funct3);
|
||||
auto fn = get_sat_funct<dest_elem_t, src2_elem_t, src1_elem_T>(funct6, funct3);
|
||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||
// body is from vstart to min(elem_count, vl)
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user