lib: Fix coldboot race condition observed on emulators/simulators

If we are running on RISC-V emulator/simulator with large number of
HARTs where each HART is a regular thread under UNIX host then it is
possible that some of the secondary HARTs don't get chance to run and
sbi_hart_wake_coldboot_harts() is called before secondary HARTs call
sbi_hart_wait_for_coldboot(). In this situation, some of the secondary
HARTs will never come-out of coldboot wait loop.

To tackle this, we introduce a global flag coldboot_done which will
be protected by coldboot lock and it will be set by primary HART from
sbi_hart_wake_coldboot_harts() before waking-up secondary HARTs. We
also re-arrange acquire/release of coldboot lock to reduce further
chances of race-condition.

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Reviewed-by: Atish Patra <atish.patra@wdc.com>
Reviewed-by: Zong Li <zong.li@sifive.com>
Reviewed-by: Nylon Chen<nylon7@andestech.com>
This commit is contained in:
Anup Patel
2019-09-26 09:55:25 +05:30
committed by Anup Patel
parent 2c7bab76a2
commit e561c63036

View File

@@ -344,12 +344,12 @@ struct sbi_scratch *sbi_hart_id_to_scratch(struct sbi_scratch *scratch,
} }
#define COLDBOOT_WAIT_BITMAP_SIZE __riscv_xlen #define COLDBOOT_WAIT_BITMAP_SIZE __riscv_xlen
static spinlock_t coldboot_wait_bitmap_lock = SPIN_LOCK_INITIALIZER; static spinlock_t coldboot_lock = SPIN_LOCK_INITIALIZER;
static unsigned long coldboot_wait_bitmap = 0; static unsigned long coldboot_done = 0;
static unsigned long coldboot_wait_bitmap = 0;
void sbi_hart_wait_for_coldboot(struct sbi_scratch *scratch, u32 hartid) void sbi_hart_wait_for_coldboot(struct sbi_scratch *scratch, u32 hartid)
{ {
unsigned long mipval;
const struct sbi_platform *plat = sbi_platform_ptr(scratch); const struct sbi_platform *plat = sbi_platform_ptr(scratch);
if ((sbi_platform_hart_count(plat) <= hartid) || if ((sbi_platform_hart_count(plat) <= hartid) ||
@@ -359,19 +359,26 @@ void sbi_hart_wait_for_coldboot(struct sbi_scratch *scratch, u32 hartid)
/* Set MSIE bit to receive IPI */ /* Set MSIE bit to receive IPI */
csr_set(CSR_MIE, MIP_MSIP); csr_set(CSR_MIE, MIP_MSIP);
do { /* Acquire coldboot lock */
spin_lock(&coldboot_wait_bitmap_lock); spin_lock(&coldboot_lock);
coldboot_wait_bitmap |= (1UL << hartid);
spin_unlock(&coldboot_wait_bitmap_lock);
/* Mark current HART as waiting */
coldboot_wait_bitmap |= (1UL << hartid);
/* Wait for coldboot to finish using WFI */
while (!coldboot_done) {
spin_unlock(&coldboot_lock);
wfi(); wfi();
mipval = csr_read(CSR_MIP); spin_lock(&coldboot_lock);
};
spin_lock(&coldboot_wait_bitmap_lock); /* Unmark current HART as waiting */
coldboot_wait_bitmap &= ~(1UL << hartid); coldboot_wait_bitmap &= ~(1UL << hartid);
spin_unlock(&coldboot_wait_bitmap_lock);
} while (!(mipval & MIP_MSIP));
/* Release coldboot lock */
spin_unlock(&coldboot_lock);
/* Clear current HART IPI */
sbi_platform_ipi_clear(plat, hartid); sbi_platform_ipi_clear(plat, hartid);
} }
@@ -380,11 +387,18 @@ void sbi_hart_wake_coldboot_harts(struct sbi_scratch *scratch, u32 hartid)
const struct sbi_platform *plat = sbi_platform_ptr(scratch); const struct sbi_platform *plat = sbi_platform_ptr(scratch);
int max_hart = sbi_platform_hart_count(plat); int max_hart = sbi_platform_hart_count(plat);
/* Acquire coldboot lock */
spin_lock(&coldboot_lock);
/* Mark coldboot done */
coldboot_done = 1;
/* Send an IPI to all HARTs waiting for coldboot */
for (int i = 0; i < max_hart; i++) { for (int i = 0; i < max_hart; i++) {
/* send an IPI to every other hart */
spin_lock(&coldboot_wait_bitmap_lock);
if ((i != hartid) && (coldboot_wait_bitmap & (1UL << i))) if ((i != hartid) && (coldboot_wait_bitmap & (1UL << i)))
sbi_platform_ipi_send(plat, i); sbi_platform_ipi_send(plat, i);
spin_unlock(&coldboot_wait_bitmap_lock);
} }
/* Release coldboot lock */
spin_unlock(&coldboot_lock);
} }