diff --git a/include/sbi/sbi_vector.h b/include/sbi/sbi_vector.h index f00184f0..c14f3174 100644 --- a/include/sbi/sbi_vector.h +++ b/include/sbi/sbi_vector.h @@ -20,6 +20,12 @@ struct sbi_vector_context { uint8_t vregs[]; }; +#define SET_VS_DIRTY(regs) do { \ + if (sbi_regs_from_virt(regs)) \ + csr_set(CSR_VSSTATUS, MSTATUS_VS); \ + regs->mstatus |= MSTATUS_VS; \ +} while(0) + #ifdef OPENSBI_CC_SUPPORT_VECTOR void sbi_vector_save(struct sbi_vector_context *dst); void sbi_vector_restore(const struct sbi_vector_context *src); diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c index 38c28caf..540f655f 100644 --- a/lib/sbi/sbi_trap_v_ldst.c +++ b/lib/sbi/sbi_trap_v_ldst.c @@ -16,11 +16,11 @@ #include #include #include -#include +#include #ifdef OPENSBI_CC_SUPPORT_VECTOR -#define VLEN_MAX 65536 +#define MASK_BUFFLEN 1024 static inline void set_vreg(ulong vlenb, ulong which, ulong pos, ulong size, const uint8_t *bytes) @@ -168,7 +168,7 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx) ulong vl = csr_read(CSR_VL); ulong vtype = csr_read(CSR_VTYPE); ulong vlenb = csr_read(CSR_VLENB); - ulong vstart = csr_read(CSR_VSTART); + ulong vstart = csr_read(CSR_VSTART), orig_vstart = vstart; ulong base = GET_RS1(insn, regs); ulong stride = GET_RS2(insn, regs); ulong vd = GET_VD(insn); @@ -178,8 +178,9 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx) ulong vlmul = GET_VLMUL(vtype); bool illegal = GET_MEW(insn); bool masked = IS_MASKED(insn); - uint8_t mask[VLEN_MAX / 8]; + uint8_t mask[MASK_BUFFLEN / 8]; uint8_t bytes[8 * sizeof(uint64_t)]; + ulong mask_len = MASK_BUFFLEN < vlenb * 8 ? MASK_BUFFLEN : vlenb * 8; ulong len = GET_LEN(view); ulong nf = GET_NF(insn); ulong vemul = GET_VEMUL(vlmul, view, vsew); @@ -200,7 +201,7 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx) stride = nf * len; } - if (illegal || vlenb > VLEN_MAX / 8) { + if (illegal) { struct sbi_trap_info trap = { uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION, uptrap.tval = insn, @@ -208,12 +209,16 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx) return sbi_trap_redirect(regs, &trap); } - if (masked) - get_vreg(vlenb, 0, 0, vlenb, mask); - do { - if (masked && (~mask[vstart / 8] & BIT(vstart % 8))) - continue; + if (masked) { + if (vstart == orig_vstart || vstart % mask_len == 0) + /* Fetch a mask_len chunk of mask */ + get_vreg(vlenb, 0, vstart / mask_len * mask_len, + mask_len, mask); + + if (~mask[vstart % mask_len / 8] & BIT(vstart % 8)) + continue; + } /* compute element address */ ulong addr = base + vstart * stride; @@ -232,15 +237,21 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx) sbi_load_loop(bytes + seg * len, addr + seg * len, len, &uptrap); - if (uptrap.cause) { - if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) { - vl = vstart; - break; - } - vsetvl(vl, vtype); - sbi_misaligned_v_tinst_fixup(&uptrap); - return sbi_trap_redirect(regs, &uptrap); + if (!uptrap.cause) + continue; + + if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) { + vl = vstart; + goto done; } + + vsetvl(vl, vtype); + csr_write(CSR_VSTART, vstart); + /* Don't forget to set dirty if vstart has changed */ + if (vstart != orig_vstart) + SET_VS_DIRTY(regs); + sbi_misaligned_v_tinst_fixup(&uptrap); + return sbi_trap_redirect(regs, &uptrap); } /* write load data to regfile */ @@ -249,8 +260,15 @@ int sbi_misaligned_v_ld_emulator(ulong insn, struct sbi_trap_context *tcntx) len, &bytes[seg * len]); } while (++vstart < vl); +done: /* restore clobbered vl/vtype */ - vsetvl(vl, vtype); + vsetvl(vl, vtype); // VSTART resets to 0 + + /* + * At least 1 element is processed, or vl is changed above in + * the FAULT_ONLY_FIRST_LOAD path, thus set dirty. + */ + SET_VS_DIRTY(regs); /* Return a >0 value for the caller to advance mepc */ return 1; @@ -263,7 +281,7 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx) ulong vl = csr_read(CSR_VL); ulong vtype = csr_read(CSR_VTYPE); ulong vlenb = csr_read(CSR_VLENB); - ulong vstart = csr_read(CSR_VSTART); + ulong vstart = csr_read(CSR_VSTART), orig_vstart = vstart; ulong base = GET_RS1(insn, regs); ulong stride = GET_RS2(insn, regs); ulong vd = GET_VD(insn); @@ -273,8 +291,9 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx) ulong vlmul = GET_VLMUL(vtype); bool illegal = GET_MEW(insn); bool masked = IS_MASKED(insn); - uint8_t mask[VLEN_MAX / 8]; + uint8_t mask[MASK_BUFFLEN / 8]; uint8_t bytes[8 * sizeof(uint64_t)]; + ulong mask_len = MASK_BUFFLEN < vlenb * 8 ? MASK_BUFFLEN : vlenb * 8; ulong len = GET_LEN(view); ulong nf = GET_NF(insn); ulong vemul = GET_VEMUL(vlmul, view, vsew); @@ -295,7 +314,7 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx) stride = nf * len; } - if (illegal || vlenb > VLEN_MAX / 8) { + if (illegal) { struct sbi_trap_info trap = { uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION, uptrap.tval = insn, @@ -303,12 +322,16 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx) return sbi_trap_redirect(regs, &trap); } - if (masked) - get_vreg(vlenb, 0, 0, vlenb, mask); - do { - if (masked && (~mask[vstart / 8] & BIT(vstart % 8))) - continue; + if (masked) { + if (vstart == orig_vstart || vstart % mask_len == 0) + /* Fetch a mask_len chunk of mask */ + get_vreg(vlenb, 0, vstart / mask_len * mask_len, + mask_len, mask); + + if (~mask[vstart % mask_len / 8] & BIT(vstart % 8)) + continue; + } /* compute element address */ ulong addr = base + vstart * stride; @@ -325,23 +348,33 @@ int sbi_misaligned_v_st_emulator(ulong insn, struct sbi_trap_context *tcntx) get_vreg(vlenb, vd + seg * emul, vstart * len, len, &bytes[seg * len]); - csr_write(CSR_VSTART, vstart); - /* write store data to memory */ for (ulong seg = 0; seg < nf; seg++) { sbi_store_loop(bytes + seg * len, addr + seg * len, len, &uptrap); - if (uptrap.cause) { - vsetvl(vl, vtype); - sbi_misaligned_v_tinst_fixup(&uptrap); - return sbi_trap_redirect(regs, &uptrap); - } + if (!uptrap.cause) + continue; + + vsetvl(vl, vtype); + csr_write(CSR_VSTART, vstart); + /* Don't forget to set dirty if vstart has changed */ + if (vstart != orig_vstart) + SET_VS_DIRTY(regs); + sbi_misaligned_v_tinst_fixup(&uptrap); + return sbi_trap_redirect(regs, &uptrap); } } while (++vstart < vl); /* restore clobbered vl/vtype */ - vsetvl(vl, vtype); + vsetvl(vl, vtype); // VSTART resets to 0 + + /* + * No need to set dirty for memory store, but as VSTART resets to + * 0 above, need to set dirty if it's originally not 0. + */ + if (orig_vstart != 0) + SET_VS_DIRTY(regs); /* Return a >0 value for the caller to advance mepc */ return 1;