changes wording of returned index to better reflect what it means, cleans up a bit

This commit is contained in:
Eyck-Alexander Jentzsch 2025-02-03 20:40:01 +01:00
parent 6f4daf91ed
commit 7a048f8b93

View File

@ -41,7 +41,6 @@
#include <functional> #include <functional>
#include <limits> #include <limits>
#include <math.h> #include <math.h>
#include <nonstd/variant.hpp>
namespace softvector { namespace softvector {
unsigned RFS = 32; unsigned RFS = 32;
@ -86,7 +85,7 @@ double vtype_t::lmul() {
vreg_view read_vmask(uint8_t* V, uint8_t VLEN, uint16_t num_elem, uint8_t reg_idx) { vreg_view read_vmask(uint8_t* V, uint8_t VLEN, uint16_t num_elem, uint8_t reg_idx) {
uint8_t* mask_start = V + VLEN / 8 * reg_idx; uint8_t* mask_start = V + VLEN / 8 * reg_idx;
return {mask_start, num_elem / 8u}; // this can return size==0 as num_elem can be as low as 1, probably not wanted return {mask_start, num_elem / 8u}; // this can return size==0 as num_elem can be as low as 1
} }
uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V, uint8_t VLEN, uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V, uint8_t VLEN,
uint8_t vd, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uint8_t elem_byte_size, uint8_t vd, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uint8_t elem_byte_size,
@ -102,7 +101,7 @@ uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint6
// elements w/ index smaller than vstart are in the prestart and get skipped // elements w/ index smaller than vstart are in the prestart and get skipped
// body is from vstart to min(num_elem, vl) // body is from vstart to min(num_elem, vl)
for(unsigned idx = vstart; idx < std::min(num_elem, vl); idx++) { for(unsigned idx = vstart; idx < std::min(num_elem, vl); idx++) {
vstart = idx; unsigned trap_idx = idx;
// vm decides active body element // vm decides active body element
uint8_t current_mask_byte = mask_view.get<uint8_t>(idx / 8); uint8_t current_mask_byte = mask_view.get<uint8_t>(idx / 8);
bool mask_active = vm ? 1 : current_mask_byte & (1 << idx % 8); bool mask_active = vm ? 1 : current_mask_byte & (1 << idx % 8);
@ -113,7 +112,7 @@ uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint6
if(mask_active) { if(mask_active) {
uint64_t addr = base_addr + (eew / 8) * (idx * segment_size + s_idx) * stride; uint64_t addr = base_addr + (eew / 8) * (idx * segment_size + s_idx) * stride;
if(!load_store_fn(core, addr, eew / 8, dest_elem)) if(!load_store_fn(core, addr, eew / 8, dest_elem))
return vstart; return trap_idx;
} else { } else {
// this only updates the first 8 bits, so eew > 8 would not work correctly // this only updates the first 8 bits, so eew > 8 would not work correctly
*dest_elem = vtype.vma() ? *dest_elem : *dest_elem; *dest_elem = vtype.vma() ? *dest_elem : *dest_elem;
@ -123,7 +122,6 @@ uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint6
// elements w/ index larger than num_elem are in the tail (fractional LMUL) // elements w/ index larger than num_elem are in the tail (fractional LMUL)
// elements w/ index larger than vl are in the tail // elements w/ index larger than vl are in the tail
for(unsigned idx = std::min(num_elem, vl); idx < VLEN / 8; idx++) { for(unsigned idx = std::min(num_elem, vl); idx < VLEN / 8; idx++) {
vstart = idx;
for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) { for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) {
// base + selected vd + current_elem + current_segment // base + selected vd + current_elem + current_segment
uint8_t* dest_elem = V + (vd * VLEN / 8) + (eew / 8 * idx) + (VLEN / 8 * s_idx * emul_stride); uint8_t* dest_elem = V + (vd * VLEN / 8) + (eew / 8 * idx) + (VLEN / 8 * s_idx * emul_stride);