changes to ternary functions for Multiply-Add Instructions
This commit is contained in:
parent
9ba9d2432c
commit
ac1322d66b
@ -385,7 +385,6 @@ if(vector != null) {%>
|
|||||||
throw new std::runtime_error("Unsupported sew bit value");
|
throw new std::runtime_error("Unsupported sew bit value");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
<%}%>
|
<%}%>
|
||||||
uint64_t fetch_count{0};
|
uint64_t fetch_count{0};
|
||||||
uint64_t tval{0};
|
uint64_t tval{0};
|
||||||
|
@ -94,40 +94,40 @@ template <> struct twice<uint64_t> { using type = __uint128_t; };
|
|||||||
template <class T> using twice_t = typename twice<T>::type; // for convenience
|
template <class T> using twice_t = typename twice<T>::type; // for convenience
|
||||||
|
|
||||||
template <typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = dest_elem_t>
|
template <typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = dest_elem_t>
|
||||||
std::function<dest_elem_t(src2_elem_t, src1_elem_t)> get_funct(unsigned funct6, unsigned funct3) {
|
std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsigned funct6, unsigned funct3) {
|
||||||
if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)
|
if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)
|
||||||
switch(funct6) {
|
switch(funct6) {
|
||||||
case 0b000000: // VADD
|
case 0b000000: // VADD
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
|
||||||
case 0b000010: // VSUB
|
case 0b000010: // VSUB
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
|
||||||
case 0b000011: // VRSUB
|
case 0b000011: // VRSUB
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 - vs2; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 - vs2; };
|
||||||
case 0b000100: // VMINU
|
case 0b000100: // VMINU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return std::min(vs2, static_cast<src2_elem_t>(vs1)); };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return std::min(vs2, static_cast<src2_elem_t>(vs1)); };
|
||||||
case 0b000101: // VMIN
|
case 0b000101: // VMIN
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return std::min(static_cast<std::make_signed_t<src2_elem_t>>(vs2), static_cast<std::make_signed_t<src2_elem_t>>(vs1));
|
return std::min(static_cast<std::make_signed_t<src2_elem_t>>(vs2), static_cast<std::make_signed_t<src2_elem_t>>(vs1));
|
||||||
};
|
};
|
||||||
case 0b000110: // VMAXU
|
case 0b000110: // VMAXU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return std::max(vs2, static_cast<src2_elem_t>(vs1)); };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return std::max(vs2, static_cast<src2_elem_t>(vs1)); };
|
||||||
case 0b000111: // VMAX
|
case 0b000111: // VMAX
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return std::max(static_cast<std::make_signed_t<src2_elem_t>>(vs2), static_cast<std::make_signed_t<src2_elem_t>>(vs1));
|
return std::max(static_cast<std::make_signed_t<src2_elem_t>>(vs2), static_cast<std::make_signed_t<src2_elem_t>>(vs1));
|
||||||
};
|
};
|
||||||
case 0b001001: // VAND
|
case 0b001001: // VAND
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 & vs2; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 & vs2; };
|
||||||
case 0b001010: // VOR
|
case 0b001010: // VOR
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 | vs2; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 | vs2; };
|
||||||
case 0b001011: // VXOR
|
case 0b001011: // VXOR
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs1 ^ vs2; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 ^ vs2; };
|
||||||
// case 0b001100: // VRGATHER
|
// case 0b001100: // VRGATHER
|
||||||
// case 0b001110: // VRGATHEREI16
|
// case 0b001110: // VRGATHEREI16
|
||||||
// case 0b001111: // VLSLIDEDOWN
|
// case 0b001111: // VLSLIDEDOWN
|
||||||
case 0b010000: // VADC
|
case 0b010000: // VADC
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
|
||||||
case 0b010010: // VSBC
|
case 0b010010: // VSBC
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) -
|
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) -
|
||||||
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
||||||
};
|
};
|
||||||
@ -137,19 +137,19 @@ std::function<dest_elem_t(src2_elem_t, src1_elem_t)> get_funct(unsigned funct6,
|
|||||||
// case 0b100010: // VSSUBU
|
// case 0b100010: // VSSUBU
|
||||||
// case 0b100011: // VSSUB
|
// case 0b100011: // VSSUB
|
||||||
case 0b100101: // VSLL
|
case 0b100101: // VSLL
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 << (vs1 & shift_mask<src2_elem_t>()); };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 << (vs1 & shift_mask<src2_elem_t>()); };
|
||||||
// case 0b100111: // VSMUL
|
// case 0b100111: // VSMUL
|
||||||
// case 0b100111: // VMV<NR>R
|
// case 0b100111: // VMV<NR>R
|
||||||
case 0b101000: // VSRL
|
case 0b101000: // VSRL
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask<src2_elem_t>()); };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask<src2_elem_t>()); };
|
||||||
case 0b101001: // VSRA
|
case 0b101001: // VSRA
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) >> (vs1 & shift_mask<src2_elem_t>());
|
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) >> (vs1 & shift_mask<src2_elem_t>());
|
||||||
};
|
};
|
||||||
case 0b101100: // VNSRL
|
case 0b101100: // VNSRL
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask<src2_elem_t>()); };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask<src2_elem_t>()); };
|
||||||
case 0b101101: // VNSRA
|
case 0b101101: // VNSRA
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) >> (vs1 & shift_mask<src2_elem_t>());
|
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) >> (vs1 & shift_mask<src2_elem_t>());
|
||||||
};
|
};
|
||||||
// case 0b101110: // VNCLIPU
|
// case 0b101110: // VNCLIPU
|
||||||
@ -185,14 +185,14 @@ std::function<dest_elem_t(src2_elem_t, src1_elem_t)> get_funct(unsigned funct6,
|
|||||||
// case 0b011110: // VMNOR
|
// case 0b011110: // VMNOR
|
||||||
// case 0b011111: // VMXNOR
|
// case 0b011111: // VMXNOR
|
||||||
case 0b100000: // VDIVU
|
case 0b100000: // VDIVU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
|
||||||
if(vs1 == 0)
|
if(vs1 == 0)
|
||||||
return -1;
|
return -1;
|
||||||
else
|
else
|
||||||
return vs2 / vs1;
|
return vs2 / vs1;
|
||||||
};
|
};
|
||||||
case 0b100001: // VDIV
|
case 0b100001: // VDIV
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
|
||||||
if(vs1 == 0)
|
if(vs1 == 0)
|
||||||
return -1;
|
return -1;
|
||||||
else if(vs2 == std::numeric_limits<std::make_signed_t<src2_elem_t>>::min() &&
|
else if(vs2 == std::numeric_limits<std::make_signed_t<src2_elem_t>>::min() &&
|
||||||
@ -202,14 +202,14 @@ std::function<dest_elem_t(src2_elem_t, src1_elem_t)> get_funct(unsigned funct6,
|
|||||||
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) / static_cast<std::make_signed_t<src1_elem_t>>(vs1);
|
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) / static_cast<std::make_signed_t<src1_elem_t>>(vs1);
|
||||||
};
|
};
|
||||||
case 0b100010: // VREMU
|
case 0b100010: // VREMU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
|
||||||
if(vs1 == 0)
|
if(vs1 == 0)
|
||||||
return vs2;
|
return vs2;
|
||||||
else
|
else
|
||||||
return vs2 % vs1;
|
return vs2 % vs1;
|
||||||
};
|
};
|
||||||
case 0b100011: // VREM
|
case 0b100011: // VREM
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
|
||||||
if(vs1 == 0)
|
if(vs1 == 0)
|
||||||
return vs2;
|
return vs2;
|
||||||
else if(vs2 == std::numeric_limits<std::make_signed_t<src2_elem_t>>::min() &&
|
else if(vs2 == std::numeric_limits<std::make_signed_t<src2_elem_t>>::min() &&
|
||||||
@ -219,68 +219,72 @@ std::function<dest_elem_t(src2_elem_t, src1_elem_t)> get_funct(unsigned funct6,
|
|||||||
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) % static_cast<std::make_signed_t<src1_elem_t>>(vs1);
|
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) % static_cast<std::make_signed_t<src1_elem_t>>(vs1);
|
||||||
};
|
};
|
||||||
case 0b100100: // VMULHU
|
case 0b100100: // VMULHU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return (static_cast<twice_t<src2_elem_t>>(vs2) * static_cast<twice_t<src2_elem_t>>(vs1)) >> sizeof(dest_elem_t) * 8;
|
return (static_cast<twice_t<src2_elem_t>>(vs2) * static_cast<twice_t<src2_elem_t>>(vs1)) >> sizeof(dest_elem_t) * 8;
|
||||||
};
|
};
|
||||||
case 0b100101: // VMUL
|
case 0b100101: // VMUL
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) * static_cast<std::make_signed_t<src1_elem_t>>(vs1);
|
return static_cast<std::make_signed_t<src2_elem_t>>(vs2) * static_cast<std::make_signed_t<src1_elem_t>>(vs1);
|
||||||
};
|
};
|
||||||
case 0b100110: // VMULHSU
|
case 0b100110: // VMULHSU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return (static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) *
|
return (static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) *
|
||||||
static_cast<twice_t<src2_elem_t>>(vs1)) >>
|
static_cast<twice_t<src2_elem_t>>(vs1)) >>
|
||||||
sizeof(dest_elem_t) * 8;
|
sizeof(dest_elem_t) * 8;
|
||||||
};
|
};
|
||||||
case 0b100111: // VMULH
|
case 0b100111: // VMULH
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return (static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) *
|
return (static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) *
|
||||||
static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src1_elem_t>>(vs1))) >>
|
static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src1_elem_t>>(vs1))) >>
|
||||||
sizeof(dest_elem_t) * 8;
|
sizeof(dest_elem_t) * 8;
|
||||||
};
|
};
|
||||||
// case 0b101001: // VMADD
|
case 0b101001: // VMADD
|
||||||
// case 0b101011: // VNMSUB
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 * vd + vs2; };
|
||||||
// case 0b101101: // VMACC
|
case 0b101011: // VNMSUB
|
||||||
// case 0b101111: // VNMSAC
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return -1 * (vs1 * vd) + vs2; };
|
||||||
|
case 0b101101: // VMACC
|
||||||
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 * vs2 + vd; };
|
||||||
|
case 0b101111: // VNMSAC
|
||||||
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return -1 * (vs1 * vs2) + vd; };
|
||||||
case 0b110000: // VWADDU
|
case 0b110000: // VWADDU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
|
||||||
case 0b110001: // VWADD
|
case 0b110001: // VWADD
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) +
|
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) +
|
||||||
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
||||||
};
|
};
|
||||||
case 0b110010: // VWSUBU
|
case 0b110010: // VWSUBU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
|
||||||
case 0b110011: // VWSUB
|
case 0b110011: // VWSUB
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) -
|
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) -
|
||||||
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
||||||
};
|
};
|
||||||
case 0b110100: // VWADDU.W
|
case 0b110100: // VWADDU.W
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
|
||||||
case 0b110101: // VWADD.W
|
case 0b110101: // VWADD.W
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) +
|
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) +
|
||||||
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
||||||
};
|
};
|
||||||
case 0b110110: // VWSUBU.W
|
case 0b110110: // VWSUBU.W
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 - vs1; };
|
||||||
case 0b110111: // VWSUB.W
|
case 0b110111: // VWSUB.W
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) -
|
return static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2) -
|
||||||
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
static_cast<std::make_signed_t<src1_elem_t>>(vs1));
|
||||||
};
|
};
|
||||||
case 0b111000: // VWMULU
|
case 0b111000: // VWMULU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return (static_cast<twice_t<src2_elem_t>>(vs2) * static_cast<twice_t<src2_elem_t>>(vs1));
|
return (static_cast<twice_t<src2_elem_t>>(vs2) * static_cast<twice_t<src2_elem_t>>(vs1));
|
||||||
};
|
};
|
||||||
case 0b111010: // VWMULSU
|
case 0b111010: // VWMULSU
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return (static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) *
|
return (static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) *
|
||||||
static_cast<twice_t<src2_elem_t>>(vs1));
|
static_cast<twice_t<src2_elem_t>>(vs1));
|
||||||
};
|
};
|
||||||
case 0b111011: // VWMUL
|
case 0b111011: // VWMUL
|
||||||
return [](src2_elem_t vs2, src1_elem_t vs1) {
|
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) {
|
||||||
return (static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) *
|
return (static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src2_elem_t>>(vs2)) *
|
||||||
static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src1_elem_t>>(vs1)));
|
static_cast<twice_t<std::make_signed_t<src2_elem_t>>>(static_cast<std::make_signed_t<src1_elem_t>>(vs1)));
|
||||||
};
|
};
|
||||||
@ -310,19 +314,18 @@ void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl,
|
|||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
auto res = fn(vs2_view[idx], vs1_view[idx]);
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]);
|
||||||
vd_view[idx] = res;
|
|
||||||
} else {
|
} else {
|
||||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if(carry == carry_t::SUB_CARRY) {
|
} else if(carry == carry_t::SUB_CARRY) {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
vd_view[idx] = fn(vs2_view[idx], vs1_view[idx]) - mask_reg[idx];
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) - mask_reg[idx];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
vd_view[idx] = fn(vs2_view[idx], vs1_view[idx]) + mask_reg[idx];
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], vs1_view[idx]) + mask_reg[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
@ -347,21 +350,18 @@ void vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
|||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||||
if(mask_active) {
|
if(mask_active) {
|
||||||
vd_view[idx] = fn(vs2_view[idx], imm);
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm);
|
||||||
} else {
|
} else {
|
||||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if(carry == carry_t::SUB_CARRY) {
|
} else if(carry == carry_t::SUB_CARRY) {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
auto val1 = fn(vs2_view[idx], imm);
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) - mask_reg[idx];
|
||||||
auto val2 = static_cast<std::make_signed_t<dest_elem_t>>(mask_reg[idx]);
|
|
||||||
auto diff = val1 - val2;
|
|
||||||
vd_view[idx] = fn(vs2_view[idx], imm) - mask_reg[idx];
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||||
vd_view[idx] = fn(vs2_view[idx], imm) + mask_reg[idx];
|
vd_view[idx] = fn(vd_view[idx], vs2_view[idx], imm) + mask_reg[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user