Changes load_store to use vreg_views aswell

This commit is contained in:
Eyck-Alexander Jentzsch 2025-02-22 16:51:53 +01:00
parent 221d2ee38c
commit e24c1874c4
4 changed files with 86 additions and 61 deletions

View File

@ -170,17 +170,61 @@ if(vector != null) {%>
inline void lower(){
this->core.reg.trap_state = 0;
}
uint64_t vlseg(uint8_t* V, uint8_t vd, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size){
return softvector::vector_load_store(this->get_arch(), softvector::softvec_read, V, traits::VLEN, vd, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size);
uint64_t vlseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size){
switch(width_val){
case 0b000:
return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
case 0b101:
return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
case 0b110:
return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
case 0b111:
return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
default:
throw new std::runtime_error("Unsupported width bit value");
}
}
uint64_t vsseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size){
return softvector::vector_load_store(this->get_arch(), softvector::softvec_write, V, traits::VLEN, vs3, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size);
uint64_t vsseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size){
switch(width_val){
case 0b000:
return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
case 0b101:
return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
case 0b110:
return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
case 0b111:
return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size);
default:
throw new std::runtime_error("Unsupported width bit value");
}
}
uint64_t vlsseg(uint8_t* V, uint8_t vd, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int${xlen}_t stride){
return softvector::vector_load_store(this->get_arch(), softvector::softvec_read, V, traits::VLEN, vd, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size, stride);
uint64_t vlsseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size, int64_t stride){
switch(width_val){
case 0b000:
return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
case 0b101:
return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
case 0b110:
return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
case 0b111:
return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_read, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
default:
throw new std::runtime_error("Unsupported width bit value");
}
}
uint64_t vssseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int${xlen}_t stride){
return softvector::vector_load_store(this->get_arch(), softvector::softvec_write, V, traits::VLEN, vs3, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size, stride);
uint64_t vssseg(uint8_t* V, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1_val, uint8_t width_val, uint8_t segment_size, int64_t stride){
switch(width_val){
case 0b000:
return softvector::vector_load_store<${vlen}, uint8_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
case 0b101:
return softvector::vector_load_store<${vlen}, uint16_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
case 0b110:
return softvector::vector_load_store<${vlen}, uint32_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
case 0b111:
return softvector::vector_load_store<${vlen}, uint64_t>(this->get_arch(), softvector::softvec_write, V, vl, vstart, vtype, vm, vd, rs1_val, segment_size, stride, true);
default:
throw new std::runtime_error("Unsupported width bit value");
}
}
uint64_t vlxseg(uint8_t* V, uint8_t vd, uint8_t vs2, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, uint8_t segment_size, bool ordered){
return softvector::vector_load_store_index(this->get_arch(), softvector::softvec_read, V, traits::VLEN, traits::XLEN, vd, vs2, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, segment_size, ordered);

View File

@ -84,59 +84,6 @@ vmask_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t re
assert(mask_start + elem_count / 8 <= V + VLEN * RFS / 8);
return {mask_start, elem_count};
}
uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V, uint16_t VLEN,
uint8_t addressed_register, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm,
uint8_t elem_size_byte, uint64_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride,
bool use_stride) {
// eew = elem_size_byte * 8
assert(pow(2, EMUL_pow) * segment_size <= 8);
assert(segment_size > 0);
// assert((elem_count & (elem_count - 1)) == 0); // check that elem_count is power of 2, this check does not hold for vlm.v and vsm.v
assert(elem_count <= VLEN * RFS / 8);
unsigned emul_stride = EMUL_pow <= 0 ? 1 : pow(2, EMUL_pow);
assert(emul_stride * segment_size <= 8);
assert(!(addressed_register % emul_stride));
if(!use_stride)
stride = elem_size_byte * segment_size;
vmask_view mask_reg = read_vmask(V, VLEN, elem_count);
// elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(elem_count, vl)
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
unsigned trap_idx = idx;
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active) {
for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) {
// base + selected register + current_elem + current_segment
uint8_t* addressed_elem = V + (addressed_register * VLEN / 8) + (elem_size_byte * idx) + (VLEN / 8 * s_idx * emul_stride);
assert(addressed_elem <= V + VLEN * RFS / 8);
uint64_t addr = base_addr + stride * idx + s_idx * elem_size_byte;
if(!load_store_fn(core, addr, elem_size_byte, addressed_elem))
return trap_idx;
}
} else {
for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) {
// base + selected register + current_elem + current_segment
uint8_t* addressed_elem = V + (addressed_register * VLEN / 8) + (elem_size_byte * idx) + (VLEN / 8 * s_idx * emul_stride);
assert(addressed_elem <= V + VLEN * RFS / 8);
// this only updates the first 8 bits, so eew > 8 would not work correctly
*addressed_elem = vtype.vma() ? *addressed_elem : *addressed_elem;
}
}
}
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
// elements w/ index larger than vl are in the tail
unsigned maximum_elems = VLEN * vtype.lmul() / (elem_size_byte * 8);
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) {
// base + selected register + current_elem + current_segment
uint8_t* addressed_elem = V + (addressed_register * VLEN / 8) + (elem_size_byte * idx) + (VLEN / 8 * s_idx * emul_stride);
assert(addressed_elem <= V + VLEN * RFS / 8);
// this only updates the first 8 bits, so eew > 8 would not work correctly
*addressed_elem = vtype.vta() ? *addressed_elem : *addressed_elem;
}
}
return 0;
}
uint64_t read_n_bits(uint8_t* V, unsigned n) {
switch(n) {
case 8:

View File

@ -72,6 +72,10 @@ uint64_t vector_load_store_index(void* core, std::function<bool(void*, uint64_t,
uint16_t VLEN, uint8_t XLEN, uint8_t addressed_register, uint8_t index_register, uint64_t base_addr,
uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uint8_t elem_size_byte, uint64_t elem_count,
uint8_t segment_size, bool ordered);
template <unsigned VLEN, typename eew_t>
uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V, uint64_t vl,
uint64_t vstart, vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1, uint8_t segment_size, int64_t stride = 0,
bool use_stride = false);
template <unsigned VLEN, typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = src2_elem_t>
void vector_vector_op(uint8_t* V, unsigned funct6, unsigned funct3, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vd,
unsigned vs2, unsigned vs1, carry_t carry = carry_t::NO_CARRY, bool merge = false);

View File

@ -95,6 +95,36 @@ template <> struct twice<uint64_t> { using type = __uint128_t; };
#endif
template <class T> using twice_t = typename twice<T>::type; // for convenience
template <unsigned VLEN, typename eew_t>
uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V, uint64_t vl,
uint64_t vstart, vtype_t vtype, bool vm, uint8_t vd, uint64_t rs1, uint8_t segment_size, int64_t stride,
bool use_stride) {
unsigned vlmax = VLEN * vtype.lmul() / vtype.sew();
auto emul_stride = std::max<unsigned>(vlmax, VLEN / (sizeof(eew_t) * 8));
auto vd_view = get_vreg<VLEN, eew_t>(V, vd, emul_stride * segment_size);
vmask_view mask_reg = read_vmask(V, VLEN, vlmax);
for(size_t idx = vstart; idx < vl; idx++) {
bool mask_active = vm ? 1 : mask_reg[idx];
if(mask_active) {
signed stride_offset = stride * idx;
auto seg_offset = use_stride ? 0 : segment_size * sizeof(eew_t) * idx;
for(size_t s_idx = 0; s_idx < segment_size; s_idx++) {
eew_t* addressed_elem = &vd_view[idx + emul_stride * s_idx];
uint64_t addr = rs1 + stride_offset + seg_offset + s_idx * sizeof(eew_t);
if(!load_store_fn(core, addr, sizeof(eew_t), reinterpret_cast<uint8_t*>(addressed_elem)))
return idx;
}
} else {
for(size_t s_idx = 0; s_idx < segment_size; s_idx++) {
// vtype.vma();
}
}
}
for(size_t idx = vl; idx < vlmax; idx++) {
}
return 0;
}
template <typename dest_elem_t, typename src2_elem_t = dest_elem_t, typename src1_elem_t = dest_elem_t>
std::function<dest_elem_t(dest_elem_t, src2_elem_t, src1_elem_t)> get_funct(unsigned funct6, unsigned funct3) {
if(funct3 == OPIVV || funct3 == OPIVX || funct3 == OPIVI)