From 6ce0d97e8172e82e7c992e15793fe2acfe9b8085 Mon Sep 17 00:00:00 2001 From: Eyck-Alexander Jentzsch Date: Wed, 12 Feb 2025 16:02:53 +0100 Subject: [PATCH] general improvements to vector_functions, adds functions to process arithmetic instructions (working add) --- gen_input/templates/interp/CORENAME.cpp.gtl | 51 ++++++- src/vm/vector_functions.cpp | 32 ++-- src/vm/vector_functions.h | 13 ++ src/vm/vector_functions.hpp | 154 ++++++++++++++++++++ 4 files changed, 223 insertions(+), 27 deletions(-) create mode 100644 src/vm/vector_functions.hpp diff --git a/gen_input/templates/interp/CORENAME.cpp.gtl b/gen_input/templates/interp/CORENAME.cpp.gtl index fda1120..fbd528f 100644 --- a/gen_input/templates/interp/CORENAME.cpp.gtl +++ b/gen_input/templates/interp/CORENAME.cpp.gtl @@ -164,7 +164,9 @@ if(vector != null) {%> inline void set_tval(uint64_t new_tval){ tval = new_tval; } -<%if(vector != null) {%> +<%if(vector != null) { + def xlen = constants.find { it.name == 'XLEN' }?.value ?: 0 + def vlen = constants.find { it.name == 'VLEN' }?.value ?: 0 %> inline void lower(){ this->core.reg.trap_state = 0; } @@ -174,19 +176,54 @@ if(vector != null) {%> uint64_t vsseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size){ return softvector::vector_load_store(this->get_arch(), softvector::softvec_write, V, traits::VLEN, vs3, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size); } - uint64_t vlsseg(uint8_t* V, uint8_t vd, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride){ + uint64_t vlsseg(uint8_t* V, uint8_t vd, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int${xlen}_t stride){ return softvector::vector_load_store(this->get_arch(), softvector::softvec_read, V, traits::VLEN, vd, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size, stride); - } - uint64_t vssseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int64_t stride){ + } + uint64_t vssseg(uint8_t* V, uint8_t vs3, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, int8_t EMUL_pow, uint8_t segment_size, int${xlen}_t stride){ return softvector::vector_load_store(this->get_arch(), softvector::softvec_write, V, traits::VLEN, vs3, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, EMUL_pow, segment_size, stride); - } + } uint64_t vlxseg(uint8_t* V, uint8_t vd, uint8_t vs2, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, uint8_t segment_size, bool ordered){ return softvector::vector_load_store_index(this->get_arch(), softvector::softvec_read, V, traits::VLEN, traits::XLEN, vd, vs2, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, segment_size, ordered); - } + } uint64_t vsxseg(uint8_t* V, uint8_t vs3, uint8_t vs2, uint64_t rs1_val, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t elem_byte_size, uint16_t elem_count, uint8_t segment_size, bool ordered){ return softvector::vector_load_store_index(this->get_arch(), softvector::softvec_write, V, traits::VLEN, traits::XLEN, vs3, vs2, rs1_val, vl, vstart, vtype, vm, elem_byte_size, elem_count, segment_size, ordered); + } + void vector_vector_op(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, uint8_t vs1, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + softvector::vector_vector_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + case 0b001: + softvector::vector_vector_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + case 0b010: + softvector::vector_vector_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + case 0b011: + softvector::vector_vector_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vs2, vs1, vd); + break; + default: + throw new std::runtime_error("Unsupported sew bit value"); } - + } + void vector_imm_op(uint8_t* V, uint8_t funct, uint64_t vl, uint64_t vstart, softvector::vtype_t vtype, bool vm, uint8_t vd, uint8_t vs2, int64_t imm, uint8_t sew_val){ + switch(sew_val){ + case 0b000: + softvector::vector_imm_op<${vlen}, uint8_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + case 0b001: + softvector::vector_imm_op<${vlen}, uint16_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + case 0b010: + softvector::vector_imm_op<${vlen}, uint32_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + case 0b011: + softvector::vector_imm_op<${vlen}, uint64_t>(V, funct, vl, vstart, vtype, vm, vs2, imm, vd); + break; + default: + throw new std::runtime_error("Unsupported sew bit value"); + } + } <%}%> uint64_t fetch_count{0}; uint64_t tval{0}; diff --git a/src/vm/vector_functions.cpp b/src/vm/vector_functions.cpp index e876c3e..1a1ca01 100644 --- a/src/vm/vector_functions.cpp +++ b/src/vm/vector_functions.cpp @@ -44,7 +44,6 @@ #include namespace softvector { -unsigned RFS = 32; bool softvec_read(void* core, uint64_t addr, uint64_t length, uint8_t* data) { // Read length bytes from addr into *data @@ -59,16 +58,6 @@ bool softvec_write(void* core, uint64_t addr, uint64_t length, uint8_t* data) { return status == iss::Ok; } -using vlen_t = uint64_t; -struct vreg_view { - uint8_t* start; - size_t size; - template T& get(size_t idx = 0) { - assert((idx * sizeof(T)) <= size); - return *(reinterpret_cast(start) + idx); - } -}; - vtype_t::vtype_t(uint32_t vtype_val) { underlying = (vtype_val & 0x8000) << 32 | (vtype_val & ~0x8000); } vtype_t::vtype_t(uint64_t vtype_val) { underlying = vtype_val; } bool vtype_t::vill() { return underlying >> 63; } @@ -85,10 +74,15 @@ double vtype_t::lmul() { int8_t signed_vlmul = (vlmul >> 2) ? 0b11111000 | vlmul : vlmul; return pow(2, signed_vlmul); } +bool vmask_view::operator[](size_t idx) { + assert(idx < elem_count); + return *(start + idx / 8) & (1U << (idx % 8)); +} -vreg_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx) { +vmask_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx) { uint8_t* mask_start = V + VLEN / 8 * reg_idx; - return {mask_start, elem_count / 8u}; // this can return size==0 as elem_count can be as low as 1 + assert(mask_start + elem_count / 8 <= V + VLEN * RFS / 8); + return {mask_start, elem_count}; } uint64_t vector_load_store(void* core, std::function load_store_fn, uint8_t* V, uint16_t VLEN, uint8_t addressed_register, uint64_t base_addr, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, @@ -104,13 +98,12 @@ uint64_t vector_load_store(void* core, std::function(idx / 8); - bool mask_active = vm ? 1 : current_mask_byte & (1 << idx % 8); + bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { for(unsigned s_idx = 0; s_idx < segment_size; s_idx++) { // base + selected register + current_elem + current_segment @@ -172,17 +165,16 @@ uint64_t vector_load_store_index(void* core, std::function(idx / 8); - bool mask_active = vm ? 1 : current_mask_byte & (1 << idx % 8); + bool mask_active = vm ? 1 : mask_reg[idx]; if(mask_active) { uint8_t* offset_elem = V + (index_register * VLEN / 8) + (index_elem_size_byte * idx); assert(offset_elem <= (V + VLEN * RFS / 8 - index_elem_size_byte)); // ensure reading index_elem_size_bytes is legal - // read sew bits from offset_elem truncate / extend to XLEN bits + // read sew bits from offset_elem, truncate / extend to XLEN bits uint64_t offset_val = read_n_bits(offset_elem, index_elem_size_byte * 8); assert(XLEN == 64 | XLEN == 32); uint64_t mask = XLEN == 64 ? std::numeric_limits::max() : std::numeric_limits::max(); diff --git a/src/vm/vector_functions.h b/src/vm/vector_functions.h index 05a53cb..a66c8d5 100644 --- a/src/vm/vector_functions.h +++ b/src/vm/vector_functions.h @@ -41,6 +41,7 @@ #include #include namespace softvector { +const unsigned RFS = 32; struct vtype_t { uint64_t underlying; @@ -52,6 +53,14 @@ struct vtype_t { bool vma(); bool vta(); }; +struct vmask_view { + uint8_t* start; + size_t elem_count; + bool operator[](size_t); +}; +vmask_view read_vmask(uint8_t* V, uint16_t VLEN, uint16_t elem_count, uint8_t reg_idx = 0); +template vmask_view read_vmask(uint8_t* V, uint16_t elem_count, uint8_t reg_idx = 0); + bool softvec_read(void* core, uint64_t addr, uint64_t length, uint8_t* data); bool softvec_write(void* core, uint64_t addr, uint64_t length, uint8_t* data); uint64_t vector_load_store(void* core, std::function load_store_fn, uint8_t* V, uint16_t VLEN, @@ -62,5 +71,9 @@ uint64_t vector_load_store_index(void* core, std::function +void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2, unsigned vs1, + unsigned vd); } // namespace softvector +#include "vm/vector_functions.hpp" #endif /* _VM_VECTOR_FUNCTIONS_H_ */ diff --git a/src/vm/vector_functions.hpp b/src/vm/vector_functions.hpp new file mode 100644 index 0000000..bde361c --- /dev/null +++ b/src/vm/vector_functions.hpp @@ -0,0 +1,154 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2025, MINRES Technologies GmbH +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its contributors +// may be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Contributors: +// alex@minres.com - initial API and implementation +//////////////////////////////////////////////////////////////////////////////// +#pragma once +#include "vm/vector_functions.h" +#include +#include +#include +#ifndef _VM_VECTOR_FUNCTIONS_H_ +#error __FILE__ should only be included from vector_functions.h +#endif +#include +namespace softvector { + +template struct vreg_view { + uint8_t* start; + size_t elem_count; + inline elem_t& get(size_t idx = 0) { + assert(idx < elem_count); + return *(reinterpret_cast(start) + idx); + } + elem_t& operator[](size_t idx) { + assert(idx < elem_count); + return *(reinterpret_cast(start) + idx); + } +}; + +template vreg_view get_vreg(uint8_t* V, uint8_t reg_idx, uint16_t elem_count) { + assert(V + elem_count * sizeof(elem_t) <= V + VLEN * RFS / 8); + return {V + VLEN / 8 * reg_idx, elem_count}; +} +template vmask_view read_vmask(uint8_t* V, uint16_t elem_count, uint8_t reg_idx) { + uint8_t* mask_start = V + VLEN / 8 * reg_idx; + assert(mask_start + elem_count / 8 <= V + VLEN * RFS / 8); + return {mask_start, elem_count}; +} +template std::function get_funct(unsigned funct) { + switch(funct) { + case 0b000000: { + // VADD + return [](elem_t vs2, elem_t vs1) { return vs2 + vs1; }; + } + case 0b000010: { + // VSUB + } + case 0b000100: { + // VMINU + } + case 0b000101: { + // VMIN + } + case 0b000110: { + // VMAXU + } + case 0b000111: { + // VMAX + } + case 0b001001: { + // VAND + } + case 0b001010: { + // VOR + } + case 0b001011: { + // VXOR + } + default: + throw new std::runtime_error("Uknown funct in get_funct"); + } +} +template +void vector_vector_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2, unsigned vs1, + unsigned vd) { + uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew(); + vmask_view mask_reg = read_vmask(V, elem_count); + auto vs1_view = get_vreg(V, vs1, elem_count); + auto vs2_view = get_vreg(V, vs2, elem_count); + auto vd_view = get_vreg(V, vd, elem_count); + auto fn = get_funct(funct6); + // elements w/ index smaller than vstart are in the prestart and get skipped + // body is from vstart to min(elem_count, vl) + for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + vd_view[idx] = fn(vs2_view[idx], vs1_view[idx]); + } else { + vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; + } + } + // elements w/ index larger than elem_count are in the tail (fractional LMUL) + // elements w/ index larger than vl are in the tail + unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(elem_t) * 8); + for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; + } + return; +} +template +void vector_imm_op(uint8_t* V, unsigned funct6, uint64_t vl, uint64_t vstart, vtype_t vtype, bool vm, unsigned vs2, + typename std::make_signed::type imm, unsigned vd) { + uint64_t elem_count = VLEN * vtype.lmul() / vtype.sew(); + vmask_view mask_reg = read_vmask(V, elem_count); + auto vs2_view = get_vreg(V, vs2, elem_count); + auto vd_view = get_vreg(V, vd, elem_count); + auto fn = get_funct(funct6); + // elements w/ index smaller than vstart are in the prestart and get skipped + // body is from vstart to min(elem_count, vl) + for(unsigned idx = vstart; idx < std::min(elem_count, vl); idx++) { + bool mask_active = vm ? 1 : mask_reg[idx]; + if(mask_active) { + vd_view[idx] = fn(vs2_view[idx], imm); + } else { + vd_view[idx] = vtype.vma() ? vd_view[idx] : vd_view[idx]; + } + } + // elements w/ index larger than elem_count are in the tail (fractional LMUL) + // elements w/ index larger than vl are in the tail + unsigned maximum_elems = VLEN * vtype.lmul() / (sizeof(elem_t) * 8); + for(unsigned idx = std::min(elem_count, vl); idx < maximum_elems; idx++) { + vd_view[idx] = vtype.vta() ? vd_view[idx] : vd_view[idx]; + } + return; +} +} // namespace softvector \ No newline at end of file