Re: [PATCH] mm: slub: annotate kmem_cache_node->list_lock as raw_spinlock

From: Peter Zijlstra
Date: Tue Apr 25 2023 - 11:04:06 EST


On Thu, Apr 13, 2023 at 09:40:13AM +0200, Peter Zijlstra wrote:
> On Thu, Apr 13, 2023 at 12:44:42AM +0800, Qi Zheng wrote:
> > > Something like the completely untested below might be of help..
>
> > I just tested the above code, and then got the following
> > warning:
> >
>
> > It seems that the LD_WAIT_SLEEP we set is already greater than the
> > LD_WAIT_SPIN of the current context.
>
> Yeah, I'm an idiot and got it wrong.. I'll try again later if I manage
> to wake up today :-)

And then I forgot ofcourse :/ Can you give the below (still mostly
untested) a spin? The crucial difference is the new
lock_map_acquire_try(). By making the annotation a 'trylock' it will
skip the acquire of the annotation itself (since trylocks don't block).

---
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 1023f349af71..435a3b0f8ea6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -551,6 +551,7 @@ do { \
#define rwsem_release(l, i) lock_release(l, i)

#define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
+#define lock_map_acquire_try(l) lock_acquire_exclusive(l, 0, 1, NULL, _THIS_IP_)
#define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
#define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
#define lock_map_release(l) lock_release(l, _THIS_IP_)
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index d22430840b53..f3120d6a7d9e 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -33,6 +33,7 @@ enum lockdep_wait_type {
enum lockdep_lock_type {
LD_LOCK_NORMAL = 0, /* normal, catch all */
LD_LOCK_PERCPU, /* percpu */
+ LD_LOCK_WAIT, /* annotation */
LD_LOCK_MAX,
};

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 50d4863974e7..d254f9e53c0e 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2253,6 +2253,9 @@ static inline bool usage_match(struct lock_list *entry, void *mask)

static inline bool usage_skip(struct lock_list *entry, void *mask)
{
+ if (entry->class->lock_type == LD_LOCK_NORMAL)
+ return false;
+
/*
* Skip local_lock() for irq inversion detection.
*
@@ -2279,14 +2282,11 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
* As a result, we will skip local_lock(), when we search for irq
* inversion bugs.
*/
- if (entry->class->lock_type == LD_LOCK_PERCPU) {
- if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
- return false;
-
- return true;
- }
+ if (entry->class->lock_type == LD_LOCK_PERCPU &&
+ DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
+ return false;

- return false;
+ return true;
}

/*
@@ -4752,7 +4752,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)

for (; depth < curr->lockdep_depth; depth++) {
struct held_lock *prev = curr->held_locks + depth;
- u8 prev_inner = hlock_class(prev)->wait_type_inner;
+ struct lock_class *class = hlock_class(prev);
+ u8 prev_inner = class->wait_type_inner;

if (prev_inner) {
/*
@@ -4762,6 +4763,12 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
* Also due to trylocks.
*/
curr_inner = min(curr_inner, prev_inner);
+
+ /*
+ * Allow override for annotations.
+ */
+ if (unlikely(class->lock_type == LD_LOCK_WAIT))
+ curr_inner = prev_inner;
}
}

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index df86e649d8be..0e089882146b 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -565,8 +565,16 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
* On RT enabled kernels the pool refill must happen in preemptible
* context:
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+ static struct lockdep_map dep_map = {
+ .name = "wait-type-override",
+ .wait_type_inner = LD_WAIT_SLEEP,
+ .lock_type = LD_LOCK_WAIT,
+ };
+ lock_map_acquire_try(&dep_map);
fill_pool();
+ lock_map_release(&dep_map);
+ }

db = get_bucket((unsigned long) addr);