lib: sbi_misaligned_ldst: Add handling of vector load/store

Add misaligned load/store handling for the vector extension to the sbi_misaligned_ldst library. This implementation is inspired from the misaligned_vec_ldst implementation in the riscv-pk project. Co-developed-by: Zong Li <zong.li@sifive.com> Signed-off-by: Zong Li <zong.li@sifive.com> Signed-off-by: Nylon Chen <nylon.chen@sifive.com> Reviewed-by: Andy Chiu <andy.chiu@sifive.com> Reviewed-by: Anup Patel <anup@brainfault.org>
2024-12-06 11:21:49 +08:00
parent c5a8b15e39
commit c2acc5e5b0
6 changed files with 760 additions and 12 deletions
--- a/lib/sbi/sbi_trap_v_ldst.c
+++ b/lib/sbi/sbi_trap_v_ldst.c
@@ -0,0 +1,343 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2024 SiFive Inc.
+ *
+ * Authors:
+ *   Andrew Waterman <andrew@sifive.com>
+ *   Nylon Chen <nylon.chen@sifive.com>
+ *   Zong Li <nylon.chen@sifive.com>
+ */
+
+#include <sbi/riscv_asm.h>
+#include <sbi/riscv_encoding.h>
+#include <sbi/sbi_error.h>
+#include <sbi/sbi_trap_ldst.h>
+#include <sbi/sbi_trap.h>
+#include <sbi/sbi_unpriv.h>
+#include <sbi/sbi_trap.h>
+
+#ifdef __riscv_vector
+#define VLEN_MAX 65536
+
+static inline void set_vreg(ulong vlenb, ulong which,
+			    ulong pos, ulong size, const uint8_t *bytes)
+{
+	pos += (which % 8) * vlenb;
+	bytes -= pos;
+
+	asm volatile (
+		"	.option push\n\t"
+		"	.option arch, +v\n\t"
+		"	vsetvli x0, %0, e8, m8, tu, ma\n\t"
+		"	.option pop\n\t"
+		:: "r" (pos + size));
+
+	csr_write(CSR_VSTART, pos);
+
+	switch (which / 8) {
+	case 0:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vle8.v v0,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 1:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vle8.v v8,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 2:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vle8.v v16,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 3:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vle8.v v24,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	default:
+		break;
+	}
+}
+
+static inline void get_vreg(ulong vlenb, ulong which,
+			    ulong pos, ulong size, uint8_t *bytes)
+{
+	pos += (which % 8) * vlenb;
+	bytes -= pos;
+
+	asm volatile (
+		"	.option push\n\t"
+		"	.option arch, +v\n\t"
+		"	vsetvli x0, %0, e8, m8, tu, ma\n\t"
+		"	.option pop\n\t"
+		:: "r" (pos + size));
+
+	csr_write(CSR_VSTART, pos);
+
+	switch (which / 8) {
+	case 0:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vse8.v v0,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 1:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vse8.v v8,  (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 2:
+		asm volatile (
+			"	.option push\n\t"
+			"	.option arch, +v\n\t"
+			"	vse8.v v16, (%0)\n\t"
+			"	.option pop\n\t"
+			:: "r" (bytes) : "memory");
+		break;
+	case 3:
+		asm volatile (
+				".option push\n\t"
+				".option arch, +v\n\t"
+				"vse8.v v24, (%0)\n\t"
+				".option pop\n\t"
+				:: "r" (bytes) : "memory");
+		break;
+	default:
+		break;
+	}
+}
+
+static inline void vsetvl(ulong vl, ulong vtype)
+{
+	asm volatile (
+		"	.option push\n\t"
+		"	.option arch, +v\n\t"
+		"	vsetvl x0, %0, %1\n\t"
+		"	.option pop\n\t"
+			:: "r" (vl), "r" (vtype));
+}
+
+int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
+				 struct sbi_trap_context *tcntx)
+{
+	const struct sbi_trap_info *orig_trap = &tcntx->trap;
+	struct sbi_trap_regs *regs = &tcntx->regs;
+	struct sbi_trap_info uptrap;
+	ulong insn = sbi_get_insn(regs->mepc, &uptrap);
+	ulong vl = csr_read(CSR_VL);
+	ulong vtype = csr_read(CSR_VTYPE);
+	ulong vlenb = csr_read(CSR_VLENB);
+	ulong vstart = csr_read(CSR_VSTART);
+	ulong base = GET_RS1(insn, regs);
+	ulong stride = GET_RS2(insn, regs);
+	ulong vd = GET_VD(insn);
+	ulong vs2 = GET_VS2(insn);
+	ulong view = GET_VIEW(insn);
+	ulong vsew = GET_VSEW(vtype);
+	ulong vlmul = GET_VLMUL(vtype);
+	bool illegal = GET_MEW(insn);
+	bool masked = IS_MASKED(insn);
+	uint8_t mask[VLEN_MAX / 8];
+	uint8_t bytes[8 * sizeof(uint64_t)];
+	ulong len = GET_LEN(view);
+	ulong nf = GET_NF(insn);
+	ulong vemul = GET_VEMUL(vlmul, view, vsew);
+	ulong emul = GET_EMUL(vemul);
+
+	if (IS_UNIT_STRIDE_LOAD(insn) || IS_FAULT_ONLY_FIRST_LOAD(insn)) {
+		stride = nf * len;
+	} else if (IS_WHOLE_REG_LOAD(insn)) {
+		vl = (nf * vlenb) >> view;
+		nf = 1;
+		vemul = 0;
+		emul = 1;
+		stride = nf * len;
+	} else if (IS_INDEXED_LOAD(insn)) {
+		len = 1 << vsew;
+		vemul = (vlmul + vsew - vsew) & 7;
+		emul = 1 << ((vemul & 4) ? 0 : vemul);
+		stride = nf * len;
+	}
+
+	if (illegal || vlenb > VLEN_MAX / 8) {
+		struct sbi_trap_info trap = {
+			uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
+			uptrap.tval = insn,
+		};
+		return sbi_trap_redirect(regs, &trap);
+	}
+
+	if (masked)
+		get_vreg(vlenb, 0, 0, vlenb, mask);
+
+	do {
+		if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
+			/* compute element address */
+			ulong addr = base + vstart * stride;
+
+			if (IS_INDEXED_LOAD(insn)) {
+				ulong offset = 0;
+
+				get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
+				addr = base + offset;
+			}
+
+			csr_write(CSR_VSTART, vstart);
+
+			/* obtain load data from memory */
+			for (ulong seg = 0; seg < nf; seg++) {
+				for (ulong i = 0; i < len; i++) {
+					bytes[seg * len + i] =
+						sbi_load_u8((void *)(addr + seg * len + i),
+							    &uptrap);
+
+					if (uptrap.cause) {
+						if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
+							vl = vstart;
+							break;
+						}
+						vsetvl(vl, vtype);
+						uptrap.tinst = sbi_misaligned_tinst_fixup(
+							orig_trap->tinst, uptrap.tinst, i);
+						return sbi_trap_redirect(regs, &uptrap);
+					}
+				}
+			}
+
+			/* write load data to regfile */
+			for (ulong seg = 0; seg < nf; seg++)
+				set_vreg(vlenb, vd + seg * emul, vstart * len,
+					 len, &bytes[seg * len]);
+		}
+	} while (++vstart < vl);
+
+	/* restore clobbered vl/vtype */
+	vsetvl(vl, vtype);
+
+	return vl;
+}
+
+int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
+				 struct sbi_trap_context *tcntx)
+{
+	const struct sbi_trap_info *orig_trap = &tcntx->trap;
+	struct sbi_trap_regs *regs = &tcntx->regs;
+	struct sbi_trap_info uptrap;
+	ulong insn = sbi_get_insn(regs->mepc, &uptrap);
+	ulong vl = csr_read(CSR_VL);
+	ulong vtype = csr_read(CSR_VTYPE);
+	ulong vlenb = csr_read(CSR_VLENB);
+	ulong vstart = csr_read(CSR_VSTART);
+	ulong base = GET_RS1(insn, regs);
+	ulong stride = GET_RS2(insn, regs);
+	ulong vd = GET_VD(insn);
+	ulong vs2 = GET_VS2(insn);
+	ulong view = GET_VIEW(insn);
+	ulong vsew = GET_VSEW(vtype);
+	ulong vlmul = GET_VLMUL(vtype);
+	bool illegal = GET_MEW(insn);
+	bool masked = IS_MASKED(insn);
+	uint8_t mask[VLEN_MAX / 8];
+	uint8_t bytes[8 * sizeof(uint64_t)];
+	ulong len = GET_LEN(view);
+	ulong nf = GET_NF(insn);
+	ulong vemul = GET_VEMUL(vlmul, view, vsew);
+	ulong emul = GET_EMUL(vemul);
+
+	if (IS_UNIT_STRIDE_STORE(insn)) {
+		stride = nf * len;
+	} else if (IS_WHOLE_REG_STORE(insn)) {
+		vl = (nf * vlenb) >> view;
+		nf = 1;
+		vemul = 0;
+		emul = 1;
+		stride = nf * len;
+	} else if (IS_INDEXED_STORE(insn)) {
+		len = 1 << vsew;
+		vemul = (vlmul + vsew - vsew) & 7;
+		emul = 1 << ((vemul & 4) ? 0 : vemul);
+		stride = nf * len;
+	}
+
+	if (illegal || vlenb > VLEN_MAX / 8) {
+		struct sbi_trap_info trap = {
+			uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
+			uptrap.tval = insn,
+		};
+		return sbi_trap_redirect(regs, &trap);
+	}
+
+	if (masked)
+		get_vreg(vlenb, 0, 0, vlenb, mask);
+
+	do {
+		if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
+			/* compute element address */
+			ulong addr = base + vstart * stride;
+
+			if (IS_INDEXED_STORE(insn)) {
+				ulong offset = 0;
+
+				get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
+				addr = base + offset;
+			}
+
+			/* obtain store data from regfile */
+			for (ulong seg = 0; seg < nf; seg++)
+				get_vreg(vlenb, vd + seg * emul, vstart * len,
+					 len, &bytes[seg * len]);
+
+			csr_write(CSR_VSTART, vstart);
+
+			/* write store data to memory */
+			for (ulong seg = 0; seg < nf; seg++) {
+				for (ulong i = 0; i < len; i++) {
+					sbi_store_u8((void *)(addr + seg * len + i),
+						     bytes[seg * len + i], &uptrap);
+					if (uptrap.cause) {
+						vsetvl(vl, vtype);
+						uptrap.tinst = sbi_misaligned_tinst_fixup(
+							orig_trap->tinst, uptrap.tinst, i);
+						return sbi_trap_redirect(regs, &uptrap);
+					}
+				}
+			}
+		}
+	} while (++vstart < vl);
+
+	/* restore clobbered vl/vtype */
+	vsetvl(vl, vtype);
+
+	return vl;
+}
+#else
+int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
+				 struct sbi_trap_context *tcntx)
+{
+	return 0;
+}
+int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
+				 struct sbi_trap_context *tcntx)
+{
+	return 0;
+}
+#endif /* __riscv_vector  */