lib: sbi_misaligned_ldst: Add handling of vector load/store

Add misaligned load/store handling for the vector extension
to the sbi_misaligned_ldst library.

This implementation is inspired from the misaligned_vec_ldst
implementation in the riscv-pk project.

Co-developed-by: Zong Li <zong.li@sifive.com>
Signed-off-by: Zong Li <zong.li@sifive.com>
Signed-off-by: Nylon Chen <nylon.chen@sifive.com>
Reviewed-by: Andy Chiu <andy.chiu@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
This commit is contained in:
Nylon Chen
2024-12-06 11:21:49 +08:00
committed by Anup Patel
parent c5a8b15e39
commit c2acc5e5b0
6 changed files with 760 additions and 12 deletions

View File

@@ -95,6 +95,7 @@ libsbi-objs-y += sbi_timer.o
libsbi-objs-y += sbi_tlb.o
libsbi-objs-y += sbi_trap.o
libsbi-objs-y += sbi_trap_ldst.o
libsbi-objs-y += sbi_trap_v_ldst.o
libsbi-objs-y += sbi_unpriv.o
libsbi-objs-y += sbi_expected_trap.o
libsbi-objs-y += sbi_cppc.o

View File

@@ -32,7 +32,7 @@ typedef int (*sbi_trap_ld_emulator)(int rlen, union sbi_ldst_data *out_val,
typedef int (*sbi_trap_st_emulator)(int wlen, union sbi_ldst_data in_val,
struct sbi_trap_context *tcntx);
static ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
ulong addr_offset)
{
if (new_tinst == INSN_PSEUDO_VS_LOAD ||
@@ -52,7 +52,7 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
ulong insn, insn_len;
union sbi_ldst_data val = { 0 };
struct sbi_trap_info uptrap;
int rc, fp = 0, shift = 0, len = 0;
int rc, fp = 0, shift = 0, len = 0, vector = 0;
if (orig_trap->tinst & 0x1) {
/*
@@ -144,6 +144,9 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
len = 2;
shift = 8 * (sizeof(ulong) - len);
insn = RVC_RS2S(insn) << SH_RD;
} else if (IS_VECTOR_LOAD_STORE(insn)) {
vector = 1;
emu = sbi_misaligned_v_ld_emulator;
} else {
return sbi_trap_redirect(regs, orig_trap);
}
@@ -152,14 +155,16 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
if (rc <= 0)
return rc;
if (!fp)
SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
if (!vector) {
if (!fp)
SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
#ifdef __riscv_flen
else if (len == 8)
SET_F64_RD(insn, regs, val.data_u64);
else
SET_F32_RD(insn, regs, val.data_ulong);
else if (len == 8)
SET_F64_RD(insn, regs, val.data_u64);
else
SET_F32_RD(insn, regs, val.data_ulong);
#endif
}
regs->mepc += insn_len;
@@ -248,6 +253,8 @@ static int sbi_trap_emulate_store(struct sbi_trap_context *tcntx,
} else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) {
len = 2;
val.data_ulong = GET_RS2S(insn, regs);
} else if (IS_VECTOR_LOAD_STORE(insn)) {
emu = sbi_misaligned_v_st_emulator;
} else {
return sbi_trap_redirect(regs, orig_trap);
}

343
lib/sbi/sbi_trap_v_ldst.c Normal file
View File

@@ -0,0 +1,343 @@
/*
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 SiFive Inc.
*
* Authors:
* Andrew Waterman <andrew@sifive.com>
* Nylon Chen <nylon.chen@sifive.com>
* Zong Li <nylon.chen@sifive.com>
*/
#include <sbi/riscv_asm.h>
#include <sbi/riscv_encoding.h>
#include <sbi/sbi_error.h>
#include <sbi/sbi_trap_ldst.h>
#include <sbi/sbi_trap.h>
#include <sbi/sbi_unpriv.h>
#include <sbi/sbi_trap.h>
#ifdef __riscv_vector
#define VLEN_MAX 65536
static inline void set_vreg(ulong vlenb, ulong which,
ulong pos, ulong size, const uint8_t *bytes)
{
pos += (which % 8) * vlenb;
bytes -= pos;
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vsetvli x0, %0, e8, m8, tu, ma\n\t"
" .option pop\n\t"
:: "r" (pos + size));
csr_write(CSR_VSTART, pos);
switch (which / 8) {
case 0:
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vle8.v v0, (%0)\n\t"
" .option pop\n\t"
:: "r" (bytes) : "memory");
break;
case 1:
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vle8.v v8, (%0)\n\t"
" .option pop\n\t"
:: "r" (bytes) : "memory");
break;
case 2:
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vle8.v v16, (%0)\n\t"
" .option pop\n\t"
:: "r" (bytes) : "memory");
break;
case 3:
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vle8.v v24, (%0)\n\t"
" .option pop\n\t"
:: "r" (bytes) : "memory");
break;
default:
break;
}
}
static inline void get_vreg(ulong vlenb, ulong which,
ulong pos, ulong size, uint8_t *bytes)
{
pos += (which % 8) * vlenb;
bytes -= pos;
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vsetvli x0, %0, e8, m8, tu, ma\n\t"
" .option pop\n\t"
:: "r" (pos + size));
csr_write(CSR_VSTART, pos);
switch (which / 8) {
case 0:
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vse8.v v0, (%0)\n\t"
" .option pop\n\t"
:: "r" (bytes) : "memory");
break;
case 1:
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vse8.v v8, (%0)\n\t"
" .option pop\n\t"
:: "r" (bytes) : "memory");
break;
case 2:
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vse8.v v16, (%0)\n\t"
" .option pop\n\t"
:: "r" (bytes) : "memory");
break;
case 3:
asm volatile (
".option push\n\t"
".option arch, +v\n\t"
"vse8.v v24, (%0)\n\t"
".option pop\n\t"
:: "r" (bytes) : "memory");
break;
default:
break;
}
}
static inline void vsetvl(ulong vl, ulong vtype)
{
asm volatile (
" .option push\n\t"
" .option arch, +v\n\t"
" vsetvl x0, %0, %1\n\t"
" .option pop\n\t"
:: "r" (vl), "r" (vtype));
}
int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
struct sbi_trap_context *tcntx)
{
const struct sbi_trap_info *orig_trap = &tcntx->trap;
struct sbi_trap_regs *regs = &tcntx->regs;
struct sbi_trap_info uptrap;
ulong insn = sbi_get_insn(regs->mepc, &uptrap);
ulong vl = csr_read(CSR_VL);
ulong vtype = csr_read(CSR_VTYPE);
ulong vlenb = csr_read(CSR_VLENB);
ulong vstart = csr_read(CSR_VSTART);
ulong base = GET_RS1(insn, regs);
ulong stride = GET_RS2(insn, regs);
ulong vd = GET_VD(insn);
ulong vs2 = GET_VS2(insn);
ulong view = GET_VIEW(insn);
ulong vsew = GET_VSEW(vtype);
ulong vlmul = GET_VLMUL(vtype);
bool illegal = GET_MEW(insn);
bool masked = IS_MASKED(insn);
uint8_t mask[VLEN_MAX / 8];
uint8_t bytes[8 * sizeof(uint64_t)];
ulong len = GET_LEN(view);
ulong nf = GET_NF(insn);
ulong vemul = GET_VEMUL(vlmul, view, vsew);
ulong emul = GET_EMUL(vemul);
if (IS_UNIT_STRIDE_LOAD(insn) || IS_FAULT_ONLY_FIRST_LOAD(insn)) {
stride = nf * len;
} else if (IS_WHOLE_REG_LOAD(insn)) {
vl = (nf * vlenb) >> view;
nf = 1;
vemul = 0;
emul = 1;
stride = nf * len;
} else if (IS_INDEXED_LOAD(insn)) {
len = 1 << vsew;
vemul = (vlmul + vsew - vsew) & 7;
emul = 1 << ((vemul & 4) ? 0 : vemul);
stride = nf * len;
}
if (illegal || vlenb > VLEN_MAX / 8) {
struct sbi_trap_info trap = {
uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
uptrap.tval = insn,
};
return sbi_trap_redirect(regs, &trap);
}
if (masked)
get_vreg(vlenb, 0, 0, vlenb, mask);
do {
if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
/* compute element address */
ulong addr = base + vstart * stride;
if (IS_INDEXED_LOAD(insn)) {
ulong offset = 0;
get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
addr = base + offset;
}
csr_write(CSR_VSTART, vstart);
/* obtain load data from memory */
for (ulong seg = 0; seg < nf; seg++) {
for (ulong i = 0; i < len; i++) {
bytes[seg * len + i] =
sbi_load_u8((void *)(addr + seg * len + i),
&uptrap);
if (uptrap.cause) {
if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
vl = vstart;
break;
}
vsetvl(vl, vtype);
uptrap.tinst = sbi_misaligned_tinst_fixup(
orig_trap->tinst, uptrap.tinst, i);
return sbi_trap_redirect(regs, &uptrap);
}
}
}
/* write load data to regfile */
for (ulong seg = 0; seg < nf; seg++)
set_vreg(vlenb, vd + seg * emul, vstart * len,
len, &bytes[seg * len]);
}
} while (++vstart < vl);
/* restore clobbered vl/vtype */
vsetvl(vl, vtype);
return vl;
}
int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
struct sbi_trap_context *tcntx)
{
const struct sbi_trap_info *orig_trap = &tcntx->trap;
struct sbi_trap_regs *regs = &tcntx->regs;
struct sbi_trap_info uptrap;
ulong insn = sbi_get_insn(regs->mepc, &uptrap);
ulong vl = csr_read(CSR_VL);
ulong vtype = csr_read(CSR_VTYPE);
ulong vlenb = csr_read(CSR_VLENB);
ulong vstart = csr_read(CSR_VSTART);
ulong base = GET_RS1(insn, regs);
ulong stride = GET_RS2(insn, regs);
ulong vd = GET_VD(insn);
ulong vs2 = GET_VS2(insn);
ulong view = GET_VIEW(insn);
ulong vsew = GET_VSEW(vtype);
ulong vlmul = GET_VLMUL(vtype);
bool illegal = GET_MEW(insn);
bool masked = IS_MASKED(insn);
uint8_t mask[VLEN_MAX / 8];
uint8_t bytes[8 * sizeof(uint64_t)];
ulong len = GET_LEN(view);
ulong nf = GET_NF(insn);
ulong vemul = GET_VEMUL(vlmul, view, vsew);
ulong emul = GET_EMUL(vemul);
if (IS_UNIT_STRIDE_STORE(insn)) {
stride = nf * len;
} else if (IS_WHOLE_REG_STORE(insn)) {
vl = (nf * vlenb) >> view;
nf = 1;
vemul = 0;
emul = 1;
stride = nf * len;
} else if (IS_INDEXED_STORE(insn)) {
len = 1 << vsew;
vemul = (vlmul + vsew - vsew) & 7;
emul = 1 << ((vemul & 4) ? 0 : vemul);
stride = nf * len;
}
if (illegal || vlenb > VLEN_MAX / 8) {
struct sbi_trap_info trap = {
uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
uptrap.tval = insn,
};
return sbi_trap_redirect(regs, &trap);
}
if (masked)
get_vreg(vlenb, 0, 0, vlenb, mask);
do {
if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
/* compute element address */
ulong addr = base + vstart * stride;
if (IS_INDEXED_STORE(insn)) {
ulong offset = 0;
get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
addr = base + offset;
}
/* obtain store data from regfile */
for (ulong seg = 0; seg < nf; seg++)
get_vreg(vlenb, vd + seg * emul, vstart * len,
len, &bytes[seg * len]);
csr_write(CSR_VSTART, vstart);
/* write store data to memory */
for (ulong seg = 0; seg < nf; seg++) {
for (ulong i = 0; i < len; i++) {
sbi_store_u8((void *)(addr + seg * len + i),
bytes[seg * len + i], &uptrap);
if (uptrap.cause) {
vsetvl(vl, vtype);
uptrap.tinst = sbi_misaligned_tinst_fixup(
orig_trap->tinst, uptrap.tinst, i);
return sbi_trap_redirect(regs, &uptrap);
}
}
}
}
} while (++vstart < vl);
/* restore clobbered vl/vtype */
vsetvl(vl, vtype);
return vl;
}
#else
int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
struct sbi_trap_context *tcntx)
{
return 0;
}
int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
struct sbi_trap_context *tcntx)
{
return 0;
}
#endif /* __riscv_vector */