adds more functions, up to slide
This commit is contained in:
parent
c1277b6528
commit
a26505cb5c
@ -497,6 +497,131 @@ if(vector != null) {%>
|
||||
void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1){
|
||||
return softvector::mask_mask_op<${vlen}>(V, funct6, funct3, vl, vstart, vd, vs2, vs1);
|
||||
}
|
||||
uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2){
|
||||
return softvector::vcpop<${vlen}>(V, vl, vstart, vm, vs2);
|
||||
}
|
||||
int64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2){
|
||||
return softvector::vfirst<${vlen}>(V, vl, vstart, vm, vs2);
|
||||
}
|
||||
void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2){
|
||||
return softvector::mask_set_op<${vlen}>(V, enc, vl, vstart, vm, vd, vs2);
|
||||
}
|
||||
void viota(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::viota<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2);
|
||||
case 0b001:
|
||||
return softvector::viota<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2);
|
||||
case 0b010:
|
||||
return softvector::viota<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2);
|
||||
case 0b011:
|
||||
return softvector::viota<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
void vid(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::vid<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd);
|
||||
case 0b001:
|
||||
return softvector::vid<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd);
|
||||
case 0b010:
|
||||
return softvector::vid<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd);
|
||||
case 0b011:
|
||||
return softvector::vid<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
void scalar_to_vector(uint8_t* V, softvector::vtype_t vtype, unsigned vd, uint64_t val, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
softvector::scalar_move<${vlen}, uint8_t>(V, vtype, vd, val, true);
|
||||
break;
|
||||
case 0b001:
|
||||
softvector::scalar_move<${vlen}, uint16_t>(V, vtype, vd, val, true);
|
||||
break;
|
||||
case 0b010:
|
||||
softvector::scalar_move<${vlen}, uint32_t>(V, vtype, vd, val, true);
|
||||
break;
|
||||
case 0b011:
|
||||
softvector::scalar_move<${vlen}, uint64_t>(V, vtype, vd, val, true);
|
||||
break;
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
uint64_t scalar_from_vector(uint8_t* V, softvector::vtype_t vtype, unsigned vd, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::scalar_move<${vlen}, uint8_t>(V, vtype, vd, 0, false);
|
||||
case 0b001:
|
||||
return softvector::scalar_move<${vlen}, uint16_t>(V, vtype, vd, 0, false);
|
||||
case 0b010:
|
||||
return softvector::scalar_move<${vlen}, uint32_t>(V, vtype, vd, 0, false);
|
||||
case 0b011:
|
||||
return softvector::scalar_move<${vlen}, uint64_t>(V, vtype, vd, 0, false);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b001:
|
||||
return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b010:
|
||||
return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b011:
|
||||
return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
|
||||
case 0b001:
|
||||
return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
|
||||
case 0b010:
|
||||
return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
|
||||
case 0b011:
|
||||
return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val) {
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::vector_slide1up<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b001:
|
||||
return softvector::vector_slide1up<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b010:
|
||||
return softvector::vector_slide1up<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b011:
|
||||
return softvector::vector_slide1up<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val) {
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
return softvector::vector_slide1down<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b001:
|
||||
return softvector::vector_slide1down<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b010:
|
||||
return softvector::vector_slide1down<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
case 0b011:
|
||||
return softvector::vector_slide1down<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
<%}%>
|
||||
uint64_t fetch_count{0};
|
||||
uint64_t tval{0};
|
||||
|
@ -102,6 +102,20 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
|
||||
unsigned vs2, unsigned vs1);
|
||||
template <unsigned VLEN>
|
||||
void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1);
|
||||
template <unsigned VLEN> uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2);
|
||||
template <unsigned VLEN> uint64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2);
|
||||
template <unsigned VLEN> void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2);
|
||||
template <unsigned VLEN, typename src_elem_t>
|
||||
void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2);
|
||||
template <unsigned VLEN, typename src_elem_t> void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd);
|
||||
template <unsigned VLEN, typename src_elem_t> uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector);
|
||||
template <unsigned VLEN, typename src_elem_t>
|
||||
void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm);
|
||||
template <unsigned VLEN, typename src_elem_t>
|
||||
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm);
|
||||
template <unsigned VLEN, typename src_elem_t>
|
||||
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm);
|
||||
|
||||
} // namespace softvector
|
||||
#include "vm/vector_functions.hpp"
|
||||
#endif /* _VM_VECTOR_FUNCTIONS_H_ */
|
||||
|
@ -57,7 +57,7 @@ template <typename elem_t> struct vreg_view {
|
||||
return *(reinterpret_cast<elem_t*>(start) + idx);
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: change the order of parameters so that it is in snyc with read_vmask
|
||||
template <unsigned VLEN, typename elem_t> vreg_view<elem_t> get_vreg(uint8_t* V, uint8_t reg_idx, uint16_t elem_count) {
|
||||
assert(V + elem_count * sizeof(elem_t) <= V + VLEN * RFS / 8);
|
||||
return {V + VLEN / 8 * reg_idx, elem_count};
|
||||
@ -122,9 +122,6 @@ std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsi
|
||||
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 | vs2; };
|
||||
case 0b001011: // VXOR
|
||||
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 ^ vs2; };
|
||||
// case 0b001100: // VRGATHER
|
||||
// case 0b001110: // VRGATHEREI16
|
||||
// case 0b001111: // VLSLIDEDOWN
|
||||
case 0b010000: // VADC
|
||||
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
|
||||
case 0b010010: // VSBC
|
||||
@ -134,7 +131,6 @@ std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsi
|
||||
};
|
||||
case 0b100101: // VSLL
|
||||
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 << (vs1 & shift_mask<src2_elem_t>()); };
|
||||
// case 0b100111: // VMV<NR>R
|
||||
case 0b101000: // VSRL
|
||||
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask<src2_elem_t>()); };
|
||||
case 0b101001: // VSRA
|
||||
@ -152,9 +148,6 @@ std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsi
|
||||
}
|
||||
else if(funct3 == OPMVV || funct3 == OPMVX)
|
||||
switch(funct6) {
|
||||
// case 0b001110: // VSLID1EUP
|
||||
// case 0b001111: // VSLIDE1DOWN
|
||||
// case 0b010111: // VCOMPRESS
|
||||
case 0b100000: // VDIVU
|
||||
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
|
||||
if(vs1 == 0)
|
||||
@ -493,13 +486,13 @@ void mask_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t v
|
||||
template <typename dest_elem_t, typename src2_elem_t = dest_elem_t>
|
||||
std::function<dest_elem_t(src2_elem_t)> get_unary_fn(unsigned unary_op) {
|
||||
switch(unary_op) {
|
||||
case 0b00111: // vsext.vf2
|
||||
case 0b00101: // vsext.vf4
|
||||
case 0b00011: // vsext.vf8
|
||||
case 0b00111: // VSEXT.VF2
|
||||
case 0b00101: // VSEXT.VF4
|
||||
case 0b00011: // VSEXT.VF8
|
||||
return [](src2_elem_t vs2) { return static_cast<std::make_signed_t<src2_elem_t>>(vs2); };
|
||||
case 0b00110: // vzext.vf2
|
||||
case 0b00100: // vzext.vf4
|
||||
case 0b00010: // vzext.vf8
|
||||
case 0b00110: // VZEXT.VF2
|
||||
case 0b00100: // VZEXT.VF4
|
||||
case 0b00010: // VZEXT.VF8
|
||||
return [](src2_elem_t vs2) { return vs2; };
|
||||
default:
|
||||
throw new std::runtime_error("Unknown funct in get_unary_fn");
|
||||
@ -818,7 +811,7 @@ std::function<void(dest_elem_t&, src_elem_t)> get_red_funct(unsigned funct6, uns
|
||||
return [](dest_elem_t& running_total, src_elem_t vs2) { return running_total += static_cast<dest_elem_t>(vs2); };
|
||||
case 0b110001: // VWREDSUM
|
||||
return [](dest_elem_t& running_total, src_elem_t vs2) {
|
||||
// cast the signed vs2 elem to unsigned to enable wraparound on overflow
|
||||
// cast the signed vs2 elem to unsigned to enable wrap around on overflow
|
||||
return running_total += static_cast<dest_elem_t>(
|
||||
static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)));
|
||||
};
|
||||
@ -889,7 +882,7 @@ void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uin
|
||||
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
||||
auto vd_view = read_vmask<VLEN>(V, elem_count, vd);
|
||||
auto fn = get_mask_funct<unsigned>(funct6, funct3); // could be bool, but would break the make_signed_t in get_mask_funct
|
||||
for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) {
|
||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
||||
unsigned new_bit_value = fn(vs2_view[idx], vs1_view[idx]);
|
||||
uint8_t* cur_mask_byte_addr = vd_view.start + idx / 8;
|
||||
unsigned cur_bit = idx % 8;
|
||||
@ -906,4 +899,173 @@ void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uin
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <unsigned VLEN> uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2) {
|
||||
uint64_t elem_count = VLEN;
|
||||
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
unsigned running_total = 0;
|
||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active && vs2_view[idx])
|
||||
running_total += 1;
|
||||
}
|
||||
return running_total;
|
||||
}
|
||||
template <unsigned VLEN> uint64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2) {
|
||||
uint64_t elem_count = VLEN;
|
||||
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active && vs2_view[idx])
|
||||
return idx;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
inline std::function<bool(bool&, bool)> get_mask_set_funct(unsigned enc) {
|
||||
switch(enc) {
|
||||
case 0b00001: // VMSBF
|
||||
return [](bool& marker, bool vs2) {
|
||||
if(marker)
|
||||
return 0;
|
||||
if(vs2) {
|
||||
marker = true;
|
||||
return 0;
|
||||
} else
|
||||
return 1;
|
||||
};
|
||||
case 0b00010: // VMSOF
|
||||
return [](bool& marker, bool vs2) {
|
||||
if(marker)
|
||||
return 0;
|
||||
if(vs2) {
|
||||
marker = true;
|
||||
return 1;
|
||||
} else
|
||||
return 0;
|
||||
};
|
||||
case 0b00011: // VMSIF
|
||||
return [](bool& marker, bool vs2) {
|
||||
if(marker)
|
||||
return 0;
|
||||
if(vs2) {
|
||||
marker = true;
|
||||
return 1;
|
||||
} else
|
||||
return 1;
|
||||
};
|
||||
case 0b10001: // VID
|
||||
default:
|
||||
throw new std::runtime_error("Unknown enc in get_mask_set_funct");
|
||||
}
|
||||
}
|
||||
template <unsigned VLEN> void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2) {
|
||||
uint64_t elem_count = VLEN;
|
||||
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
||||
auto vd_view = read_vmask<VLEN>(V, elem_count, vd);
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto fn = get_mask_set_funct(enc);
|
||||
bool marker = false;
|
||||
for(unsigned idx = vstart; idx < vl; idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
unsigned new_bit_value = fn(marker, vs2_view[idx]);
|
||||
uint8_t* cur_mask_byte_addr = vd_view.start + idx / 8;
|
||||
unsigned cur_bit = idx % 8;
|
||||
*cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast<unsigned>(new_bit_value) << cur_bit;
|
||||
}
|
||||
}
|
||||
// the tail is all elements of the destination register beyond the first one
|
||||
for(unsigned idx = vl; idx < VLEN; idx++) {
|
||||
// always tail agnostic
|
||||
// this is a nop, placeholder for vta behavior
|
||||
unsigned new_bit_value = vd_view[idx];
|
||||
uint8_t* cur_mask_byte_addr = vd_view.start + idx / 8;
|
||||
unsigned cur_bit = idx % 8;
|
||||
*cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast<unsigned>(new_bit_value) << cur_bit;
|
||||
}
|
||||
}
|
||||
template <unsigned VLEN, typename src_elem_t>
|
||||
void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
|
||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
unsigned current = 0;
|
||||
for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
vd_view[idx] = current;
|
||||
if(vs2_view[idx])
|
||||
current += 1;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <unsigned VLEN, typename src_elem_t> void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
vd_view[idx] = idx;
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <unsigned VLEN, typename src_elem_t> uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector) {
|
||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, 1);
|
||||
if(to_vector) {
|
||||
vd_view[0] = val;
|
||||
for(unsigned idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) {
|
||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||
}
|
||||
}
|
||||
return static_cast<int64_t>(static_cast<std::make_signed_t<src_elem_t>>(vd_view[0]));
|
||||
}
|
||||
template <unsigned VLEN, typename src_elem_t>
|
||||
void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / (sizeof(src_elem_t) * 8);
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, elem_count);
|
||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
|
||||
for(unsigned idx = std::max<int64_t>(vstart, imm); idx < vl; idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
src_elem_t src_elem = 0;
|
||||
if(imm >= 0 || (idx - imm < elem_count))
|
||||
src_elem = vs2_view[idx - imm];
|
||||
if(mask_active) {
|
||||
vd_view[idx] = src_elem;
|
||||
} else {
|
||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||
}
|
||||
}
|
||||
for(unsigned idx = vl; idx < elem_count; idx++) {
|
||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <unsigned VLEN, typename src_elem_t>
|
||||
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) {
|
||||
vector_slide<VLEN, src_elem_t>(V, vl, vstart, vtype, vm, vd, vs2, 1);
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, 1);
|
||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, 1);
|
||||
if(vm || mask_reg[0])
|
||||
vd_view[0] = imm;
|
||||
else
|
||||
vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0];
|
||||
}
|
||||
template <unsigned VLEN, typename src_elem_t>
|
||||
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) {
|
||||
vector_slide<VLEN, src_elem_t>(V, vl, vstart, vtype, vm, vd, vs2, -1);
|
||||
if(vl > 0) {
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, vl);
|
||||
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, vl);
|
||||
if(vm || mask_reg[vl - 1])
|
||||
vd_view[vl - 1] = imm;
|
||||
else
|
||||
vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0];
|
||||
}
|
||||
}
|
||||
} // namespace softvector
|
Loading…
x
Reference in New Issue
Block a user