general improvements to vector_functions, adds functions to process arithmetic instructions (working add)
This commit is contained in:
parent
69c8fda5d2
commit
6ce0d97e81
@ -164,7 +164,9 @@ if(vector != null) {%>
|
||||
inline void set_tval(uint64_t new_tval){
|
||||
tval = new_tval;
|
||||
}
|
||||
<%if(vector != null) {%>
|
||||
<%if(vector != null) {
|
||||
def xlen = constants.find { it.name == 'XLEN' }?.value ?: 0
|
||||
def vlen = constants.find { it.name == 'VLEN' }?.value ?: 0 %>
|
||||
inline void lower(){
|
||||
this->core.reg.trap_state = 0;
|
||||
}
|
||||
@ -174,19 +176,54 @@ if(vector != null) {%>
|
||||
uint64_t vsseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size){
|
||||
return softvector::vector_load_store(this->get_arch(), softvector::softvec_write, V, traits::VLEN, vs3, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size);
|
||||
}
|
||||
uint64_t vlsseg(uint8_t* V, uint8_t vd, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride){
|
||||
uint64_t vlsseg(uint8_t* V, uint8_t vd, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int${xlen}_t stride){
|
||||
return softvector::vector_load_store(this->get_arch(), softvector::softvec_read, V, traits::VLEN, vd, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size, stride);
|
||||
}
|
||||
uint64_t vssseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride){
|
||||
}
|
||||
uint64_t vssseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int${xlen}_t stride){
|
||||
return softvector::vector_load_store(this->get_arch(), softvector::softvec_write, V, traits::VLEN, vs3, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size, stride);
|
||||
}
|
||||
}
|
||||
uint64_t vlxseg(uint8_t* V, uint8_t vd, uint8_t vs2, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, uint8_t segment_size, bool ordered){
|
||||
return softvector::vector_load_store_index(this->get_arch(), softvector::softvec_read, V, traits::VLEN, traits::XLEN, vd, vs2, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, segment_size, ordered);
|
||||
}
|
||||
}
|
||||
uint64_t vsxseg(uint8_t* V, uint8_t vs3, uint8_t vs2, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, uint8_t segment_size, bool ordered){
|
||||
return softvector::vector_load_store_index(this->get_arch(), softvector::softvec_write, V, traits::VLEN, traits::XLEN, vs3, vs2, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, segment_size, ordered);
|
||||
}
|
||||
void vector_vector_op(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
softvector::vector_vector_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd);
|
||||
break;
|
||||
case 0b001:
|
||||
softvector::vector_vector_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd);
|
||||
break;
|
||||
case 0b010:
|
||||
softvector::vector_vector_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd);
|
||||
break;
|
||||
case 0b011:
|
||||
softvector::vector_vector_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd);
|
||||
break;
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
|
||||
}
|
||||
void vector_imm_op(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){
|
||||
switch(sew_val){
|
||||
case 0b000:
|
||||
softvector::vector_imm_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd);
|
||||
break;
|
||||
case 0b001:
|
||||
softvector::vector_imm_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd);
|
||||
break;
|
||||
case 0b010:
|
||||
softvector::vector_imm_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd);
|
||||
break;
|
||||
case 0b011:
|
||||
softvector::vector_imm_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd);
|
||||
break;
|
||||
default:
|
||||
throw new std::runtime_error("Unsupported sew bit value");
|
||||
}
|
||||
}
|
||||
<%}%>
|
||||
uint64_t fetch_count{0};
|
||||
uint64_t tval{0};
|
||||
|
@ -44,7 +44,6 @@
|
||||
#include <stdexcept>
|
||||
|
||||
namespace softvector {
|
||||
unsigned RFS = 32;
|
||||
|
||||
bool softvec_read(void* core, uint64_t addr, uint64_t length, uint8_t* data) {
|
||||
// Read length bytes from addr into *data
|
||||
@ -59,16 +58,6 @@ bool softvec_write(void* core, uint64_t addr, uint64_t length, uint8_t* data) {
|
||||
return status == iss::Ok;
|
||||
}
|
||||
|
||||
using vlen_t = uint64_t;
|
||||
struct vreg_view {
|
||||
uint8_t* start;
|
||||
size_t size;
|
||||
template <typename T> T& get(size_t idx = 0) {
|
||||
assert((idx * sizeof(T)) <= size);
|
||||
return *(reinterpret_cast<T*>(start) + idx);
|
||||
}
|
||||
};
|
||||
|
||||
vtype_t::vtype_t(uint32_t vtype_val) { underlying = (vtype_val & 0x8000) << 32 | (vtype_val & ~0x8000); }
|
||||
vtype_t::vtype_t(uint64_t vtype_val) { underlying = vtype_val; }
|
||||
bool vtype_t::vill() { return underlying >> 63; }
|
||||
@ -85,10 +74,15 @@ double vtype_t::lmul() {
|
||||
int8_t signed_vlmul = (vlmul >> 2) ? 0b11111000 | vlmul : vlmul;
|
||||
return pow(2, signed_vlmul);
|
||||
}
|
||||
bool vmask_view::operator[](size_t idx) {
|
||||
assert(idx < elem_count);
|
||||
return *(start + idx / 8) & (1U << (idx % 8));
|
||||
}
|
||||
|
||||
vreg_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx) {
|
||||
vmask_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx) {
|
||||
uint8_t* mask_start = V + VLEN / 8 * reg_idx;
|
||||
return {mask_start, elem_count / 8u}; // this can return size==0 as elem_count can be as low as 1
|
||||
assert(mask_start + elem_count / 8 <= V + VLEN * RFS / 8);
|
||||
return {mask_start, elem_count};
|
||||
}
|
||||
uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V, uint16_t VLEN,
|
||||
uint8_t addressed_register, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm,
|
||||
@ -104,13 +98,12 @@ uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint6
|
||||
assert(!(addressed_register % emul_stride));
|
||||
if(!use_stride)
|
||||
stride = elem_size_byte * segment_size;
|
||||
vreg_view mask_view = read_vmask(V, VLEN, elem_count, 0);
|
||||
vmask_view mask_reg = read_vmask(V, VLEN, elem_count);
|
||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||
// body is from vstart to min(elem_count, vl)
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
unsigned trap_idx = idx;
|
||||
uint8_t current_mask_byte = mask_view.get<uint8_t>(idx / 8);
|
||||
bool mask_active = vm ? 1 : current_mask_byte & (1 << idx % 8);
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) {
|
||||
// base + selected register + current_elem + current_segment
|
||||
@ -172,17 +165,16 @@ uint64_t vector_load_store_index(void* core, std::function<bool(void*, uint64_t,
|
||||
unsigned data_emul_stride = vtype.lmul() < 1 ? 1 : vtype.lmul();
|
||||
assert(data_emul_stride * segment_size <= 8);
|
||||
unsigned data_elem_size_byte = vtype.sew() / 8;
|
||||
vreg_view mask_view = read_vmask(V, VLEN, elem_count, 0);
|
||||
vmask_view mask_reg = read_vmask(V, VLEN, elem_count);
|
||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||
// body is from vstart to min(elem_count, vl)
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
unsigned trap_idx = idx;
|
||||
uint8_t current_mask_byte = mask_view.get<uint8_t>(idx / 8);
|
||||
bool mask_active = vm ? 1 : current_mask_byte & (1 << idx % 8);
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
uint8_t* offset_elem = V + (index_register * VLEN / 8) + (index_elem_size_byte * idx);
|
||||
assert(offset_elem <= (V + VLEN * RFS / 8 - index_elem_size_byte)); // ensure reading index_elem_size_bytes is legal
|
||||
// read sew bits from offset_elem truncate / extend to XLEN bits
|
||||
// read sew bits from offset_elem, truncate / extend to XLEN bits
|
||||
uint64_t offset_val = read_n_bits(offset_elem, index_elem_size_byte * 8);
|
||||
assert(XLEN == 64 | XLEN == 32);
|
||||
uint64_t mask = XLEN == 64 ? std::numeric_limits<uint64_t>::max() : std::numeric_limits<uint32_t>::max();
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <functional>
|
||||
#include <stdint.h>
|
||||
namespace softvector {
|
||||
const unsigned RFS = 32;
|
||||
|
||||
struct vtype_t {
|
||||
uint64_t underlying;
|
||||
@ -52,6 +53,14 @@ struct vtype_t {
|
||||
bool vma();
|
||||
bool vta();
|
||||
};
|
||||
struct vmask_view {
|
||||
uint8_t* start;
|
||||
size_t elem_count;
|
||||
bool operator[](size_t);
|
||||
};
|
||||
vmask_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx = 0);
|
||||
template <unsigned VLEN> vmask_view read_vmask(uint8_t* V, uint16_t elem_count, uint8_t reg_idx = 0);
|
||||
|
||||
bool softvec_read(void* core, uint64_t addr, uint64_t length, uint8_t* data);
|
||||
bool softvec_write(void* core, uint64_t addr, uint64_t length, uint8_t* data);
|
||||
uint64_t vector_load_store(void* core, std::function<bool(void*, uint64_t, uint64_t, uint8_t*)> load_store_fn, uint8_t* V, uint16_t VLEN,
|
||||
@ -62,5 +71,9 @@ uint64_t vector_load_store_index(void* core, std::function<bool(void*, uint64_t,
|
||||
uint16_t VLEN, uint8_t XLEN, uint8_t addressed_register, uint8_t index_register, uint64_t base_addr,
|
||||
uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, uint8_t elem_size_byte, uint64_t elem_count,
|
||||
uint8_t segment_size, bool ordered);
|
||||
template <unsigned VLEN, typename elem_t>
|
||||
void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2, unsigned vs1,
|
||||
unsigned vd);
|
||||
} // namespace softvector
|
||||
#include "vm/vector_functions.hpp"
|
||||
#endif /* _VM_VECTOR_FUNCTIONS_H_ */
|
||||
|
154
src/vm/vector_functions.hpp
Normal file
154
src/vm/vector_functions.hpp
Normal file
@ -0,0 +1,154 @@
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Copyright (C) 2025, MINRES Technologies GmbH
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
// may be used to endorse or promote products derived from this software
|
||||
// without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Contributors:
|
||||
// alex@minres.com - initial API and implementation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
#pragma once
|
||||
#include "vm/vector_functions.h"
|
||||
#include <functional>
|
||||
#include <stdexcept>
|
||||
#include <type_traits>
|
||||
#ifndef _VM_VECTOR_FUNCTIONS_H_
|
||||
#error __FILE__ should only be included from vector_functions.h
|
||||
#endif
|
||||
#include <math.h>
|
||||
namespace softvector {
|
||||
|
||||
template <typename elem_t> struct vreg_view {
|
||||
uint8_t* start;
|
||||
size_t elem_count;
|
||||
inline elem_t& get(size_t idx = 0) {
|
||||
assert(idx < elem_count);
|
||||
return *(reinterpret_cast<elem_t*>(start) + idx);
|
||||
}
|
||||
elem_t& operator[](size_t idx) {
|
||||
assert(idx < elem_count);
|
||||
return *(reinterpret_cast<elem_t*>(start) + idx);
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned VLEN, typename elem_t> vreg_view<elem_t> get_vreg(uint8_t* V, uint8_t reg_idx, uint16_t elem_count) {
|
||||
assert(V + elem_count * sizeof(elem_t) <= V + VLEN * RFS / 8);
|
||||
return {V + VLEN / 8 * reg_idx, elem_count};
|
||||
}
|
||||
template <unsigned VLEN> vmask_view read_vmask(uint8_t* V, uint16_t elem_count, uint8_t reg_idx) {
|
||||
uint8_t* mask_start = V + VLEN / 8 * reg_idx;
|
||||
assert(mask_start + elem_count / 8 <= V + VLEN * RFS / 8);
|
||||
return {mask_start, elem_count};
|
||||
}
|
||||
template <typename elem_t> std::function<elem_t(elem_t, elem_t)> get_funct(unsigned funct) {
|
||||
switch(funct) {
|
||||
case 0b000000: {
|
||||
// VADD
|
||||
return [](elem_t vs2, elem_t vs1) { return vs2 + vs1; };
|
||||
}
|
||||
case 0b000010: {
|
||||
// VSUB
|
||||
}
|
||||
case 0b000100: {
|
||||
// VMINU
|
||||
}
|
||||
case 0b000101: {
|
||||
// VMIN
|
||||
}
|
||||
case 0b000110: {
|
||||
// VMAXU
|
||||
}
|
||||
case 0b000111: {
|
||||
// VMAX
|
||||
}
|
||||
case 0b001001: {
|
||||
// VAND
|
||||
}
|
||||
case 0b001010: {
|
||||
// VOR
|
||||
}
|
||||
case 0b001011: {
|
||||
// VXOR
|
||||
}
|
||||
default:
|
||||
throw new std::runtime_error("Uknown funct in get_funct");
|
||||
}
|
||||
}
|
||||
template <unsigned VLEN, typename elem_t>
|
||||
void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2, unsigned vs1,
|
||||
unsigned vd) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs1_view = get_vreg<VLEN, elem_t>(V, vs1, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
|
||||
auto vd_view = get_vreg<VLEN, elem_t>(V, vd, elem_count);
|
||||
auto fn = get_funct<elem_t>(funct6);
|
||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||
// body is from vstart to min(elem_count, vl)
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
vd_view[idx] = fn(vs2_view[idx], vs1_view[idx]);
|
||||
} else {
|
||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||
}
|
||||
}
|
||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||
// elements w/ index larger than vl are in the tail
|
||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(elem_t) * 8);
|
||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <unsigned VLEN, typename elem_t>
|
||||
void vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2,
|
||||
typename std::make_signed<elem_t>::type imm, unsigned vd) {
|
||||
uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew();
|
||||
vmask_view mask_reg = read_vmask<VLEN>(V, elem_count);
|
||||
auto vs2_view = get_vreg<VLEN, elem_t>(V, vs2, elem_count);
|
||||
auto vd_view = get_vreg<VLEN, elem_t>(V, vd, elem_count);
|
||||
auto fn = get_funct<elem_t>(funct6);
|
||||
// elements w/ index smaller than vstart are in the prestart and get skipped
|
||||
// body is from vstart to min(elem_count, vl)
|
||||
for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) {
|
||||
bool mask_active = vm ? 1 : mask_reg[idx];
|
||||
if(mask_active) {
|
||||
vd_view[idx] = fn(vs2_view[idx], imm);
|
||||
} else {
|
||||
vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx];
|
||||
}
|
||||
}
|
||||
// elements w/ index larger than elem_count are in the tail (fractional LMUL)
|
||||
// elements w/ index larger than vl are in the tail
|
||||
unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(elem_t) * 8);
|
||||
for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) {
|
||||
vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx];
|
||||
}
|
||||
return;
|
||||
}
|
||||
} // namespace softvector
|
Loading…
x
Reference in New Issue
Block a user