Re: [PATCH v4 12/22] lib/stackdepot: use read/write lock

From: Oscar Salvador
Date: Wed Jan 03 2024 - 04:14:09 EST


On Mon, Nov 20, 2023 at 06:47:10PM +0100, andrey.konovalov@xxxxxxxxx wrote:
> From: Andrey Konovalov <andreyknvl@xxxxxxxxxx>
>
> Currently, stack depot uses the following locking scheme:
>
> 1. Lock-free accesses when looking up a stack record, which allows to
> have multiple users to look up records in parallel;
> 2. Spinlock for protecting the stack depot pools and the hash table
> when adding a new record.
>
> For implementing the eviction of stack traces from stack depot, the
> lock-free approach is not going to work anymore, as we will need to be
> able to also remove records from the hash table.
>
> Convert the spinlock into a read/write lock, and drop the atomic accesses,
> as they are no longer required.
>
> Looking up stack traces is now protected by the read lock and adding new
> records - by the write lock. One of the following patches will add a new
> function for evicting stack records, which will be protected by the write
> lock as well.
>
> With this change, multiple users can still look up records in parallel.
>
> This is preparatory patch for implementing the eviction of stack records
> from the stack depot.
>
> Reviewed-by: Alexander Potapenko <glider@xxxxxxxxxx>
> Signed-off-by: Andrey Konovalov <andreyknvl@xxxxxxxxxx>

Reviewed-by: Oscar Salvador <osalvador@xxxxxxx>

> ---
>
> Changed v2->v3:
> - Use lockdep_assert_held_read annotation in depot_fetch_stack.
>
> Changes v1->v2:
> - Add lockdep_assert annotations.
> ---
> lib/stackdepot.c | 87 +++++++++++++++++++++++++-----------------------
> 1 file changed, 46 insertions(+), 41 deletions(-)
>
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index a5eff165c0d5..8378b32b5310 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -23,6 +23,7 @@
> #include <linux/percpu.h>
> #include <linux/printk.h>
> #include <linux/slab.h>
> +#include <linux/spinlock.h>
> #include <linux/stacktrace.h>
> #include <linux/stackdepot.h>
> #include <linux/string.h>
> @@ -91,15 +92,15 @@ static void *new_pool;
> static int pools_num;
> /* Next stack in the freelist of stack records within stack_pools. */
> static struct stack_record *next_stack;
> -/* Lock that protects the variables above. */
> -static DEFINE_RAW_SPINLOCK(pool_lock);
> /*
> * Stack depot tries to keep an extra pool allocated even before it runs out
> * of space in the currently used pool. This flag marks whether this extra pool
> * needs to be allocated. It has the value 0 when either an extra pool is not
> * yet allocated or if the limit on the number of pools is reached.
> */
> -static int new_pool_required = 1;
> +static bool new_pool_required = true;
> +/* Lock that protects the variables above. */
> +static DEFINE_RWLOCK(pool_rwlock);
>
> static int __init disable_stack_depot(char *str)
> {
> @@ -232,6 +233,8 @@ static void depot_init_pool(void *pool)
> const int records_in_pool = DEPOT_POOL_SIZE / DEPOT_STACK_RECORD_SIZE;
> int i, offset;
>
> + lockdep_assert_held_write(&pool_rwlock);
> +
> /* Initialize handles and link stack records to each other. */
> for (i = 0, offset = 0;
> offset <= DEPOT_POOL_SIZE - DEPOT_STACK_RECORD_SIZE;
> @@ -254,22 +257,17 @@ static void depot_init_pool(void *pool)
>
> /* Save reference to the pool to be used by depot_fetch_stack(). */
> stack_pools[pools_num] = pool;
> -
> - /*
> - * WRITE_ONCE() pairs with potential concurrent read in
> - * depot_fetch_stack().
> - */
> - WRITE_ONCE(pools_num, pools_num + 1);
> + pools_num++;
> }
>
> /* Keeps the preallocated memory to be used for a new stack depot pool. */
> static void depot_keep_new_pool(void **prealloc)
> {
> + lockdep_assert_held_write(&pool_rwlock);
> +
> /*
> * If a new pool is already saved or the maximum number of
> * pools is reached, do not use the preallocated memory.
> - * Access new_pool_required non-atomically, as there are no concurrent
> - * write accesses to this variable.
> */
> if (!new_pool_required)
> return;
> @@ -287,15 +285,15 @@ static void depot_keep_new_pool(void **prealloc)
> * At this point, either a new pool is kept or the maximum
> * number of pools is reached. In either case, take note that
> * keeping another pool is not required.
> - * smp_store_release() pairs with smp_load_acquire() in
> - * stack_depot_save().
> */
> - smp_store_release(&new_pool_required, 0);
> + new_pool_required = false;
> }
>
> /* Updates references to the current and the next stack depot pools. */
> static bool depot_update_pools(void **prealloc)
> {
> + lockdep_assert_held_write(&pool_rwlock);
> +
> /* Check if we still have objects in the freelist. */
> if (next_stack)
> goto out_keep_prealloc;
> @@ -307,7 +305,7 @@ static bool depot_update_pools(void **prealloc)
>
> /* Take note that we might need a new new_pool. */
> if (pools_num < DEPOT_MAX_POOLS)
> - smp_store_release(&new_pool_required, 1);
> + new_pool_required = true;
>
> /* Try keeping the preallocated memory for new_pool. */
> goto out_keep_prealloc;
> @@ -341,6 +339,8 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
> {
> struct stack_record *stack;
>
> + lockdep_assert_held_write(&pool_rwlock);
> +
> /* Update current and new pools if required and possible. */
> if (!depot_update_pools(prealloc))
> return NULL;
> @@ -376,18 +376,15 @@ depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc)
> static struct stack_record *depot_fetch_stack(depot_stack_handle_t handle)
> {
> union handle_parts parts = { .handle = handle };
> - /*
> - * READ_ONCE() pairs with potential concurrent write in
> - * depot_init_pool().
> - */
> - int pools_num_cached = READ_ONCE(pools_num);
> void *pool;
> size_t offset = parts.offset << DEPOT_STACK_ALIGN;
> struct stack_record *stack;
>
> - if (parts.pool_index > pools_num_cached) {
> + lockdep_assert_held_read(&pool_rwlock);
> +
> + if (parts.pool_index > pools_num) {
> WARN(1, "pool index %d out of bounds (%d) for stack id %08x\n",
> - parts.pool_index, pools_num_cached, handle);
> + parts.pool_index, pools_num, handle);
> return NULL;
> }
>
> @@ -429,6 +426,8 @@ static inline struct stack_record *find_stack(struct stack_record *bucket,
> {
> struct stack_record *found;
>
> + lockdep_assert_held(&pool_rwlock);
> +
> for (found = bucket; found; found = found->next) {
> if (found->hash == hash &&
> found->size == size &&
> @@ -446,6 +445,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
> depot_stack_handle_t handle = 0;
> struct page *page = NULL;
> void *prealloc = NULL;
> + bool need_alloc = false;
> unsigned long flags;
> u32 hash;
>
> @@ -465,22 +465,26 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
> hash = hash_stack(entries, nr_entries);
> bucket = &stack_table[hash & stack_hash_mask];
>
> - /*
> - * Fast path: look the stack trace up without locking.
> - * smp_load_acquire() pairs with smp_store_release() to |bucket| below.
> - */
> - found = find_stack(smp_load_acquire(bucket), entries, nr_entries, hash);
> - if (found)
> + read_lock_irqsave(&pool_rwlock, flags);
> +
> + /* Fast path: look the stack trace up without full locking. */
> + found = find_stack(*bucket, entries, nr_entries, hash);
> + if (found) {
> + read_unlock_irqrestore(&pool_rwlock, flags);
> goto exit;
> + }
> +
> + /* Take note if another stack pool needs to be allocated. */
> + if (new_pool_required)
> + need_alloc = true;
> +
> + read_unlock_irqrestore(&pool_rwlock, flags);
>
> /*
> - * Check if another stack pool needs to be allocated. If so, allocate
> - * the memory now: we won't be able to do that under the lock.
> - *
> - * smp_load_acquire() pairs with smp_store_release() in
> - * depot_update_pools() and depot_keep_new_pool().
> + * Allocate memory for a new pool if required now:
> + * we won't be able to do that under the lock.
> */
> - if (unlikely(can_alloc && smp_load_acquire(&new_pool_required))) {
> + if (unlikely(can_alloc && need_alloc)) {
> /*
> * Zero out zone modifiers, as we don't have specific zone
> * requirements. Keep the flags related to allocation in atomic
> @@ -494,7 +498,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
> prealloc = page_address(page);
> }
>
> - raw_spin_lock_irqsave(&pool_lock, flags);
> + write_lock_irqsave(&pool_rwlock, flags);
>
> found = find_stack(*bucket, entries, nr_entries, hash);
> if (!found) {
> @@ -503,11 +507,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
>
> if (new) {
> new->next = *bucket;
> - /*
> - * smp_store_release() pairs with smp_load_acquire()
> - * from |bucket| above.
> - */
> - smp_store_release(bucket, new);
> + *bucket = new;
> found = new;
> }
> } else if (prealloc) {
> @@ -518,7 +518,7 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries,
> depot_keep_new_pool(&prealloc);
> }
>
> - raw_spin_unlock_irqrestore(&pool_lock, flags);
> + write_unlock_irqrestore(&pool_rwlock, flags);
> exit:
> if (prealloc) {
> /* Stack depot didn't use this memory, free it. */
> @@ -542,6 +542,7 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
> unsigned long **entries)
> {
> struct stack_record *stack;
> + unsigned long flags;
>
> *entries = NULL;
> /*
> @@ -553,8 +554,12 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
> if (!handle || stack_depot_disabled)
> return 0;
>
> + read_lock_irqsave(&pool_rwlock, flags);
> +
> stack = depot_fetch_stack(handle);
>
> + read_unlock_irqrestore(&pool_rwlock, flags);
> +
> *entries = stack->entries;
> return stack->size;
> }
> --
> 2.25.1
>

--
Oscar Salvador
SUSE Labs