adds vsmul, widens functions parameters for sat_vector operations
This commit is contained in:
parent
75d96bf18d
commit
e1911bc450
@ -596,24 +596,20 @@ template <typename T> bool get_rounding_increment(T v, uint64_t d, int64_t vxrm)
|
|||||||
case 0b10: // rdn: round-down (truncate)
|
case 0b10: // rdn: round-down (truncate)
|
||||||
return false;
|
return false;
|
||||||
case 0b11: // rod: round-to-odd (jam)
|
case 0b11: // rod: round-to-odd (jam)
|
||||||
return (!(v & (1 << d)) && ((v & ((1 << d) - 1)) != 0));
|
return (!(v & (static_cast<T>(1) << d)) && ((v & ((static_cast<T>(1) << d) - 1)) != 0));
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
template <typename T> T roundoff_unsigned(T v, uint64_t d, int64_t vxrm) {
|
template <typename T> T roundoff(T v, uint64_t d, int64_t vxrm) {
|
||||||
unsigned r = get_rounding_increment(v, d, vxrm);
|
|
||||||
return (v >> d) + r;
|
|
||||||
}
|
|
||||||
template <typename T> T roundoff_signed(T v, uint64_t d, int64_t vxrm) {
|
|
||||||
unsigned r = get_rounding_increment(v, d, vxrm);
|
unsigned r = get_rounding_increment(v, d, vxrm);
|
||||||
return (v >> d) + r;
|
return (v >> d) + r;
|
||||||
}
|
}
|
||||||
template <typename dest_elem_t, typename src_elem_t = dest_elem_t>
|
template <typename dest_elem_t, typename src_elem_t = dest_elem_t>
|
||||||
std::function<bool(uint64_t, dest_elem_t&, src_elem_t, src_elem_t)> get_sat_funct(unsigned funct6, unsigned funct3) {
|
std::function<bool(uint64_t, vtype_t, dest_elem_t&, src_elem_t, src_elem_t)> get_sat_funct(unsigned funct6, unsigned funct3) {
|
||||||
if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)
|
if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)
|
||||||
switch(funct6) {
|
switch(funct6) {
|
||||||
case 0b100000: // VSADDU
|
case 0b100000: // VSADDU
|
||||||
return [](uint64_t vxrm, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||||
auto res = static_cast<twice_t<src_elem_t>>(vs2) + static_cast<twice_t<src_elem_t>>(vs1);
|
auto res = static_cast<twice_t<src_elem_t>>(vs2) + static_cast<twice_t<src_elem_t>>(vs1);
|
||||||
if(res > std::numeric_limits<src_elem_t>::max()) {
|
if(res > std::numeric_limits<src_elem_t>::max()) {
|
||||||
vd = std::numeric_limits<src_elem_t>::max();
|
vd = std::numeric_limits<src_elem_t>::max();
|
||||||
@ -624,7 +620,7 @@ std::function<bool(uint64_t, dest_elem_t&, src_elem_t, src_elem_t)> get_sat_func
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
case 0b100001: // VSADD
|
case 0b100001: // VSADD
|
||||||
return [](uint64_t vxrm, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||||
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) +
|
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) +
|
||||||
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||||
if(res < std::numeric_limits<std::make_signed_t<src_elem_t>>::min()) {
|
if(res < std::numeric_limits<std::make_signed_t<src_elem_t>>::min()) {
|
||||||
@ -639,7 +635,7 @@ std::function<bool(uint64_t, dest_elem_t&, src_elem_t, src_elem_t)> get_sat_func
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
case 0b100010: // VSSUBU
|
case 0b100010: // VSSUBU
|
||||||
return [](uint64_t vxrm, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||||
if(vs2 < vs1) {
|
if(vs2 < vs1) {
|
||||||
vd = 0;
|
vd = 0;
|
||||||
return 1;
|
return 1;
|
||||||
@ -649,7 +645,7 @@ std::function<bool(uint64_t, dest_elem_t&, src_elem_t, src_elem_t)> get_sat_func
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
case 0b100011: // VSSUB
|
case 0b100011: // VSSUB
|
||||||
return [](uint64_t vxrm, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||||
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) -
|
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) -
|
||||||
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||||
if(res < std::numeric_limits<std::make_signed_t<src_elem_t>>::min()) {
|
if(res < std::numeric_limits<std::make_signed_t<src_elem_t>>::min()) {
|
||||||
@ -663,7 +659,22 @@ std::function<bool(uint64_t, dest_elem_t&, src_elem_t, src_elem_t)> get_sat_func
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// case 0b100111: // VSMUL
|
case 0b100111: // VSMUL
|
||||||
|
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||||
|
auto big_val = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) *
|
||||||
|
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||||
|
auto res = roundoff(big_val, vtype.sew() - 1, vxrm);
|
||||||
|
if(res < std::numeric_limits<std::make_signed_t<src_elem_t>>::min()) {
|
||||||
|
vd = std::numeric_limits<std::make_signed_t<src_elem_t>>::min();
|
||||||
|
return 1;
|
||||||
|
} else if(res > std::numeric_limits<std::make_signed_t<src_elem_t>>::max()) {
|
||||||
|
vd = std::numeric_limits<std::make_signed_t<src_elem_t>>::max();
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
vd = res;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
||||||
// case 0b101010: // VSSRL
|
// case 0b101010: // VSSRL
|
||||||
// case 0b101011: // VSSRA
|
// case 0b101011: // VSSRA
|
||||||
// case 0b101110: // VNCLIPU
|
// case 0b101110: // VNCLIPU
|
||||||
@ -674,29 +685,29 @@ std::function<bool(uint64_t, dest_elem_t&, src_elem_t, src_elem_t)> get_sat_func
|
|||||||
else if(funct3 == OPMVV || funct3 == OPMVX)
|
else if(funct3 == OPMVV || funct3 == OPMVX)
|
||||||
switch(funct6) {
|
switch(funct6) {
|
||||||
case 0b001000: // VAADDU
|
case 0b001000: // VAADDU
|
||||||
return [](uint64_t vxrm, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||||
auto res = static_cast<twice_t<src_elem_t>>(vs2) + static_cast<twice_t<src_elem_t>>(vs1);
|
auto res = static_cast<twice_t<src_elem_t>>(vs2) + static_cast<twice_t<src_elem_t>>(vs1);
|
||||||
vd = roundoff_unsigned(res, 1, vxrm);
|
vd = roundoff(res, 1, vxrm);
|
||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
case 0b001001: // VAADD
|
case 0b001001: // VAADD
|
||||||
return [](uint64_t vxrm, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||||
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) +
|
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) +
|
||||||
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||||
vd = roundoff_signed(res, 1, vxrm);
|
vd = roundoff(res, 1, vxrm);
|
||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
case 0b001010: // VASUBU
|
case 0b001010: // VASUBU
|
||||||
return [](uint64_t vxrm, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||||
auto res = static_cast<twice_t<src_elem_t>>(vs2) - static_cast<twice_t<src_elem_t>>(vs1);
|
auto res = static_cast<twice_t<src_elem_t>>(vs2) - static_cast<twice_t<src_elem_t>>(vs1);
|
||||||
vd = roundoff_unsigned(res, 1, vxrm);
|
vd = roundoff(res, 1, vxrm);
|
||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
case 0b001011: // VASUB
|
case 0b001011: // VASUB
|
||||||
return [](uint64_t vxrm, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
return [](uint64_t vxrm, vtype_t vtype, dest_elem_t& vd, src_elem_t vs2, src_elem_t vs1) {
|
||||||
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) -
|
auto res = static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)) -
|
||||||
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
static_cast<twice_t<std::make_signed_t<src_elem_t>>>(static_cast<std::make_signed_t<src_elem_t>>(vs1));
|
||||||
vd = roundoff_signed(res, 1, vxrm);
|
vd = roundoff(res, 1, vxrm);
|
||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
default:
|
default:
|
||||||
@ -720,7 +731,7 @@ bool sat_vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t
|
|||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
saturated |= fn(vxrm, vd_view[idx], vs2_view[idx], vs1_view[idx]);
|
saturated |= fn(vxrm, vtype, vd_view[idx], vs2_view[idx], vs1_view[idx]);
|
||||||
} else {
|
} else {
|
||||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
@ -747,7 +758,7 @@ bool sat_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl
|
|||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
saturated |= fn(vxrm, vd_view[idx], vs2_view[idx], imm);
|
saturated |= fn(vxrm, vtype, vd_view[idx], vs2_view[idx], imm);
|
||||||
} else {
|
} else {
|
||||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user