adds more functions, up to slide

This commit is contained in:
Eyck-Alexander Jentzsch 2025-02-21 14:59:33 +01:00
parent c1277b6528
commit a26505cb5c
3 changed files with 317 additions and 16 deletions

View File

@ -497,6 +497,131 @@ if(vector != null) {%>
void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1){
return softvector::mask_mask_op<${vlen}>(V, funct6, funct3, vl, vstart, vd, vs2, vs1);
}
uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2){
return softvector::vcpop<${vlen}>(V, vl, vstart, vm, vs2);
}
int64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2){
return softvector::vfirst<${vlen}>(V, vl, vstart, vm, vs2);
}
void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2){
return softvector::mask_set_op<${vlen}>(V, enc, vl, vstart, vm, vd, vs2);
}
void viota(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::viota<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2);
case 0b001:
return softvector::viota<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2);
case 0b010:
return softvector::viota<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2);
case 0b011:
return softvector::viota<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vid(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::vid<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd);
case 0b001:
return softvector::vid<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd);
case 0b010:
return softvector::vid<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd);
case 0b011:
return softvector::vid<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void scalar_to_vector(uint8_t* V, softvector::vtype_t vtype, unsigned vd, uint64_t val, uint8_t sew_val){
switch(sew_val){
case 0b000:
softvector::scalar_move<${vlen}, uint8_t>(V, vtype, vd, val, true);
break;
case 0b001:
softvector::scalar_move<${vlen}, uint16_t>(V, vtype, vd, val, true);
break;
case 0b010:
softvector::scalar_move<${vlen}, uint32_t>(V, vtype, vd, val, true);
break;
case 0b011:
softvector::scalar_move<${vlen}, uint64_t>(V, vtype, vd, val, true);
break;
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
uint64_t scalar_from_vector(uint8_t* V, softvector::vtype_t vtype, unsigned vd, uint8_t sew_val){
switch(sew_val){
case 0b000:
return softvector::scalar_move<${vlen}, uint8_t>(V, vtype, vd, 0, false);
case 0b001:
return softvector::scalar_move<${vlen}, uint16_t>(V, vtype, vd, 0, false);
case 0b010:
return softvector::scalar_move<${vlen}, uint32_t>(V, vtype, vd, 0, false);
case 0b011:
return softvector::scalar_move<${vlen}, uint64_t>(V, vtype, vd, 0, false);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_slideup(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
switch(sew_val){
case 0b000:
return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b001:
return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b010:
return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b011:
return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_slidedown(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, uint64_t imm, uint8_t sew_val) {
switch(sew_val){
case 0b000:
return softvector::vector_slide<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
case 0b001:
return softvector::vector_slide<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
case 0b010:
return softvector::vector_slide<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
case 0b011:
return softvector::vector_slide<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, -imm);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val) {
switch(sew_val){
case 0b000:
return softvector::vector_slide1up<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b001:
return softvector::vector_slide1up<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b010:
return softvector::vector_slide1up<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b011:
return softvector::vector_slide1up<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm, uint8_t sew_val) {
switch(sew_val){
case 0b000:
return softvector::vector_slide1down<${vlen}, uint8_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b001:
return softvector::vector_slide1down<${vlen}, uint16_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b010:
return softvector::vector_slide1down<${vlen}, uint32_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
case 0b011:
return softvector::vector_slide1down<${vlen}, uint64_t>(V, vl, vstart, vtype, vm, vd, vs2, imm);
default:
throw new std::runtime_error("Unsupported sew bit value");
}
}
<%}%>
uint64_t fetch_count{0};
uint64_t tval{0};

View File

@ -102,6 +102,20 @@ void vector_red_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, ui
unsigned vs2, unsigned vs1);
template <unsigned VLEN>
void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, unsigned vd, unsigned vs2, unsigned vs1);
template <unsigned VLEN> uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2);
template <unsigned VLEN> uint64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2);
template <unsigned VLEN> void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2);
template <unsigned VLEN, typename src_elem_t>
void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2);
template <unsigned VLEN, typename src_elem_t> void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd);
template <unsigned VLEN, typename src_elem_t> uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector);
template <unsigned VLEN, typename src_elem_t>
void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm);
template <unsigned VLEN, typename src_elem_t>
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm);
template <unsigned VLEN, typename src_elem_t>
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm);
} // namespace softvector
#include "vm/vector_functions.hpp"
#endif /* _VM_VECTOR_FUNCTIONS_H_ */

View File

@ -57,7 +57,7 @@ template <typename elem_t> struct vreg_view {
return *(reinterpret_cast<elem_t*>(start) + idx);
}
};
// TODO: change the order of parameters so that it is in snyc with read_vmask
template <unsigned VLEN, typename elem_t> vreg_view<elem_t> get_vreg(uint8_t* V, uint8_t reg_idx, uint16_t elem_count) {
assert(V + elem_count * sizeof(elem_t) <= V + VLEN * RFS / 8);
return {V + VLEN / 8 * reg_idx, elem_count};
@ -122,9 +122,6 @@ std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsi
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 | vs2; };
case 0b001011: // VXOR
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs1 ^ vs2; };
// case 0b001100: // VRGATHER
// case 0b001110: // VRGATHEREI16
// case 0b001111: // VLSLIDEDOWN
case 0b010000: // VADC
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 + vs1; };
case 0b010010: // VSBC
@ -134,7 +131,6 @@ std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsi
};
case 0b100101: // VSLL
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 << (vs1 & shift_mask<src2_elem_t>()); };
// case 0b100111: // VMV<NR>R
case 0b101000: // VSRL
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) { return vs2 >> (vs1 & shift_mask<src2_elem_t>()); };
case 0b101001: // VSRA
@ -152,9 +148,6 @@ std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsi
}
else if(funct3 == OPMVV || funct3 == OPMVX)
switch(funct6) {
// case 0b001110: // VSLID1EUP
// case 0b001111: // VSLIDE1DOWN
// case 0b010111: // VCOMPRESS
case 0b100000: // VDIVU
return [](dest_elem_t vd, src2_elem_t vs2, src1_elem_t vs1) -> dest_elem_t {
if(vs1 == 0)
@ -493,13 +486,13 @@ void mask_vector_imm_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t v
template <typename dest_elem_t, typename src2_elem_t = dest_elem_t>
std::function<dest_elem_t(src2_elem_t)> get_unary_fn(unsigned unary_op) {
switch(unary_op) {
case 0b00111: // vsext.vf2
case 0b00101: // vsext.vf4
case 0b00011: // vsext.vf8
case 0b00111: // VSEXT.VF2
case 0b00101: // VSEXT.VF4
case 0b00011: // VSEXT.VF8
return [](src2_elem_t vs2) { return static_cast<std::make_signed_t<src2_elem_t>>(vs2); };
case 0b00110: // vzext.vf2
case 0b00100: // vzext.vf4
case 0b00010: // vzext.vf8
case 0b00110: // VZEXT.VF2
case 0b00100: // VZEXT.VF4
case 0b00010: // VZEXT.VF8
return [](src2_elem_t vs2) { return vs2; };
default:
throw new std::runtime_error("Unknown funct in get_unary_fn");
@ -818,7 +811,7 @@ std::function<void(dest_elem_t&, src_elem_t)> get_red_funct(unsigned funct6, uns
return [](dest_elem_t& running_total, src_elem_t vs2) { return running_total += static_cast<dest_elem_t>(vs2); };
case 0b110001: // VWREDSUM
return [](dest_elem_t& running_total, src_elem_t vs2) {
// cast the signed vs2 elem to unsigned to enable wraparound on overflow
// cast the signed vs2 elem to unsigned to enable wrap around on overflow
return running_total += static_cast<dest_elem_t>(
static_cast<std::make_signed_t<dest_elem_t>>(static_cast<std::make_signed_t<src_elem_t>>(vs2)));
};
@ -889,7 +882,7 @@ void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uin
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
auto vd_view = read_vmask<VLEN>(V, elem_count, vd);
auto fn = get_mask_funct<unsigned>(funct6, funct3); // could be bool, but would break the make_signed_t in get_mask_funct
for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) {
for(unsigned idx = vstart; idx < vl; idx++) {
unsigned new_bit_value = fn(vs2_view[idx], vs1_view[idx]);
uint8_t* cur_mask_byte_addr = vd_view.start + idx / 8;
unsigned cur_bit = idx % 8;
@ -906,4 +899,173 @@ void mask_mask_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uin
}
return;
}
template <unsigned VLEN> uint64_t vcpop(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2) {
uint64_t elem_count = VLEN;
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
unsigned running_total = 0;
for(unsigned idx = vstart; idx < vl; idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active && vs2_view[idx])
running_total += 1;
}
return running_total;
}
template <unsigned VLEN> uint64_t vfirst(uint8_t* V, uint64_t vl, uint64_t vstart, bool vm, unsigned vs2) {
uint64_t elem_count = VLEN;
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
for(unsigned idx = vstart; idx < vl; idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active && vs2_view[idx])
return idx;
}
return -1;
}
inline std::function<bool(bool&, bool)> get_mask_set_funct(unsigned enc) {
switch(enc) {
case 0b00001: // VMSBF
return [](bool& marker, bool vs2) {
if(marker)
return 0;
if(vs2) {
marker = true;
return 0;
} else
return 1;
};
case 0b00010: // VMSOF
return [](bool& marker, bool vs2) {
if(marker)
return 0;
if(vs2) {
marker = true;
return 1;
} else
return 0;
};
case 0b00011: // VMSIF
return [](bool& marker, bool vs2) {
if(marker)
return 0;
if(vs2) {
marker = true;
return 1;
} else
return 1;
};
case 0b10001: // VID
default:
throw new std::runtime_error("Unknown enc in get_mask_set_funct");
}
}
template <unsigned VLEN> void mask_set_op(uint8_t* V, unsigned enc, uint64_t vl, uint64_t vstart, bool vm, unsigned vd, unsigned vs2) {
uint64_t elem_count = VLEN;
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
auto vd_view = read_vmask<VLEN>(V, elem_count, vd);
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto fn = get_mask_set_funct(enc);
bool marker = false;
for(unsigned idx = vstart; idx < vl; idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active) {
unsigned new_bit_value = fn(marker, vs2_view[idx]);
uint8_t* cur_mask_byte_addr = vd_view.start + idx / 8;
unsigned cur_bit = idx % 8;
*cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast<unsigned>(new_bit_value) << cur_bit;
}
}
// the tail is all elements of the destination register beyond the first one
for(unsigned idx = vl; idx < VLEN; idx++) {
// always tail agnostic
// this is a nop, placeholder for vta behavior
unsigned new_bit_value = vd_view[idx];
uint8_t* cur_mask_byte_addr = vd_view.start + idx / 8;
unsigned cur_bit = idx % 8;
*cur_mask_byte_addr = *cur_mask_byte_addr & ~(1U << cur_bit) | static_cast<unsigned>(new_bit_value) << cur_bit;
}
}
template <unsigned VLEN, typename src_elem_t>
void viota(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
auto vs2_view = read_vmask<VLEN>(V, elem_count, vs2);
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
unsigned current = 0;
for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active) {
vd_view[idx] = current;
if(vs2_view[idx])
current += 1;
}
}
return;
}
template <unsigned VLEN, typename src_elem_t> void vid(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd) {
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
for(unsigned idx = vstart; idx < std::min(vl, elem_count); idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active) {
vd_view[idx] = idx;
}
}
return;
}
template <unsigned VLEN, typename src_elem_t> uint64_t scalar_move(uint8_t* V, vtype_t vtype, unsigned vd, uint64_t val, bool to_vector) {
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, 1);
if(to_vector) {
vd_view[0] = val;
for(unsigned idx = 1; idx < VLEN / (vtype.sew() * RFS); idx++) {
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
}
}
return static_cast<int64_t>(static_cast<std::make_signed_t<src_elem_t>>(vd_view[0]));
}
template <unsigned VLEN, typename src_elem_t>
void vector_slide(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) {
uint64_t elem_count = VLEN * vtype.lmul() / (sizeof(src_elem_t) * 8);
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
auto vs2_view = get_vreg<VLEN, src_elem_t>(V, vs2, elem_count);
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, elem_count);
for(unsigned idx = std::max<int64_t>(vstart, imm); idx < vl; idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
src_elem_t src_elem = 0;
if(imm >= 0 || (idx - imm < elem_count))
src_elem = vs2_view[idx - imm];
if(mask_active) {
vd_view[idx] = src_elem;
} else {
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
}
}
for(unsigned idx = vl; idx < elem_count; idx++) {
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
}
return;
}
template <unsigned VLEN, typename src_elem_t>
void vector_slide1up(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) {
vector_slide<VLEN, src_elem_t>(V, vl, vstart, vtype, vm, vd, vs2, 1);
vmask_view mask_reg = read_vmask<VLEN>(V, 1);
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, 1);
if(vm || mask_reg[0])
vd_view[0] = imm;
else
vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0];
}
template <unsigned VLEN, typename src_elem_t>
void vector_slide1down(uint8_t* V, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd, unsigned vs2, int64_t imm) {
vector_slide<VLEN, src_elem_t>(V, vl, vstart, vtype, vm, vd, vs2, -1);
if(vl > 0) {
vmask_view mask_reg = read_vmask<VLEN>(V, vl);
auto vd_view = get_vreg<VLEN, src_elem_t>(V, vd, vl);
if(vm || mask_reg[vl - 1])
vd_view[vl - 1] = imm;
else
vd_view[0] = vtype.vma() ? vd_view[0] : vd_view[0];
}
}
} // namespace softvector