From 99aabc6b8431a2bcf2b28a2423952e529de9fbc5 Mon Sep 17 00:00:00 2001 From: Raj Vishwanathan Date: Wed, 23 Apr 2025 15:50:45 -0700 Subject: [PATCH] lib: sbi: Set the scratch allocation to alignment to cacheline size Set the scratch allocation alignment to cacheline size specified by riscv,cbom-block-size in the DTS file to avoid two atomic variables from the same cache line causing livelock on some platforms. If the cacheline is not specified, we set it a default value. Signed-off-by: Raj Vishwanathan Reviewed-by: Anup Patel Reviewed-by: Samuel Holland Link: https://lore.kernel.org/r/20250423225045.267983-1-Raj.Vishwanathan@gmail.com Signed-off-by: Anup Patel --- include/sbi/sbi_platform.h | 5 +++++ include/sbi_utils/fdt/fdt_helper.h | 2 ++ lib/sbi/sbi_scratch.c | 26 ++++++++++++++++++++++++-- lib/utils/fdt/fdt_helper.c | 24 ++++++++++++++++++++++++ platform/generic/platform.c | 8 ++++++++ 5 files changed, 63 insertions(+), 2 deletions(-) diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h index 82840ae5..08ece320 100644 --- a/include/sbi/sbi_platform.h +++ b/include/sbi/sbi_platform.h @@ -39,6 +39,8 @@ #define SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET (0x60 + __SIZEOF_POINTER__) /** Offset of hart_index2id in struct sbi_platform */ #define SBI_PLATFORM_HART_INDEX2ID_OFFSET (0x60 + (__SIZEOF_POINTER__ * 2)) +/** Offset of cbom_block_size in struct sbi_platform */ +#define SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET (0x60 + (__SIZEOF_POINTER__ * 3)) #define SBI_PLATFORM_TLB_RANGE_FLUSH_LIMIT_DEFAULT (1UL << 12) @@ -190,6 +192,8 @@ struct sbi_platform { * hart_index2id[] = */ const u32 *hart_index2id; + /** Allocation alignment for Scratch */ + unsigned long cbom_block_size; }; /** @@ -207,6 +211,7 @@ assert_member_offset(struct sbi_platform, reserved, SBI_PLATFORM_RESERVED_OFFSET assert_member_offset(struct sbi_platform, platform_ops_addr, SBI_PLATFORM_OPS_OFFSET); assert_member_offset(struct sbi_platform, firmware_context, SBI_PLATFORM_FIRMWARE_CONTEXT_OFFSET); assert_member_offset(struct sbi_platform, hart_index2id, SBI_PLATFORM_HART_INDEX2ID_OFFSET); +assert_member_offset(struct sbi_platform, cbom_block_size, SBI_PLATFORM_CBOM_BLOCK_SIZE_OFFSET); /** Get pointer to sbi_platform for sbi_scratch pointer */ #define sbi_platform_ptr(__s) \ diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h index 58758808..04c850cc 100644 --- a/include/sbi_utils/fdt/fdt_helper.h +++ b/include/sbi_utils/fdt/fdt_helper.h @@ -50,6 +50,8 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid); int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid); +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long *cbom_block_size); + int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq); int fdt_parse_isa_extensions(const void *fdt, unsigned int hartid, diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c index 8c7eeaf8..bb14a1a2 100644 --- a/lib/sbi/sbi_scratch.c +++ b/lib/sbi/sbi_scratch.c @@ -14,6 +14,8 @@ #include #include +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__ + u32 sbi_scratch_hart_count; u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS] = { [0 ... SBI_HARTMASK_MAX_BITS-1] = -1U }; struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS]; @@ -21,6 +23,19 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS]; static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER; static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET; +/* + * Get the alignment size. + * Return DEFAULT_SCRATCH_ALLOC_ALIGNMENT or riscv,cbom_block_size + */ +static unsigned long sbi_get_scratch_alloc_align(void) +{ + const struct sbi_platform *plat = sbi_platform_thishart_ptr(); + + if (!plat || !plat->cbom_block_size) + return DEFAULT_SCRATCH_ALLOC_ALIGN; + return plat->cbom_block_size; +} + u32 sbi_hartid_to_hartindex(u32 hartid) { sbi_for_each_hartindex(i) @@ -57,6 +72,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size) void *ptr; unsigned long ret = 0; struct sbi_scratch *rscratch; + unsigned long scratch_alloc_align = 0; /* * We have a simple brain-dead allocator which never expects @@ -70,8 +86,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size) if (!size) return 0; - size += __SIZEOF_POINTER__ - 1; - size &= ~((unsigned long)__SIZEOF_POINTER__ - 1); + scratch_alloc_align = sbi_get_scratch_alloc_align(); + + /* + * We let the allocation align to cacheline bytes to avoid livelock on + * certain platforms due to atomic variables from the same cache line. + */ + size += scratch_alloc_align - 1; + size &= ~(scratch_alloc_align - 1); spin_lock(&extra_lock); diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c index 79e59dd5..b2d91fdf 100644 --- a/lib/utils/fdt/fdt_helper.c +++ b/lib/utils/fdt/fdt_helper.c @@ -246,6 +246,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid) return 0; } +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, unsigned long *cbom_block_size) +{ + int len; + const void *prop; + const fdt32_t *val; + + if (!fdt || cpu_offset < 0) + return SBI_EINVAL; + + prop = fdt_getprop(fdt, cpu_offset, "device_type", &len); + if (!prop || !len) + return SBI_EINVAL; + if (strncmp (prop, "cpu", strlen ("cpu"))) + return SBI_EINVAL; + + val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len); + if (!val || len < sizeof(fdt32_t)) + return SBI_EINVAL; + + if (cbom_block_size) + *cbom_block_size = fdt32_to_cpu(*val); + return 0; +} + int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid) { u32 hartid; diff --git a/platform/generic/platform.c b/platform/generic/platform.c index f3072be8..2e856642 100644 --- a/platform/generic/platform.c +++ b/platform/generic/platform.c @@ -147,6 +147,8 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1, const void *fdt = (void *)arg1; u32 hartid, hart_count = 0; int rc, root_offset, cpus_offset, cpu_offset, len; + unsigned long cbom_block_size = 0; + unsigned long tmp = 0; root_offset = fdt_path_offset(fdt, "/"); if (root_offset < 0) @@ -174,11 +176,17 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1, continue; generic_hart_index2id[hart_count++] = hartid; + + rc = fdt_parse_cbom_block_size(fdt, cpu_offset, &tmp); + if (rc) + continue; + cbom_block_size = MAX(tmp, cbom_block_size); } platform.hart_count = hart_count; platform.heap_size = fw_platform_get_heap_size(fdt, hart_count); platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt); + platform.cbom_block_size = cbom_block_size; fw_platform_coldboot_harts_init(fdt);