Re: [PATCH] mm: slub: annotate kmem_cache_node->list_lock as raw_spinlock

From: Qi Zheng
Date: Thu Apr 13 2023 - 10:50:02 EST




On 2023/4/13 00:44, Qi Zheng wrote:


On 2023/4/12 20:47, Peter Zijlstra wrote:
On Wed, Apr 12, 2023 at 08:50:29AM +0200, Vlastimil Babka wrote:

--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -562,10 +562,10 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
         unsigned long flags;

         /*
-        * On RT enabled kernels the pool refill must happen in preemptible
+        * The pool refill must happen in preemptible
          * context:
          */
-       if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+       if (preemptible())
                 fill_pool();

+CC Peterz

Aha so this is in fact another case where the code is written with
actual differences between PREEMPT_RT and !PREEMPT_RT in mind, but
CONFIG_PROVE_RAW_LOCK_NESTING always assumes PREEMPT_RT?

Ooh, tricky, yes. PROVE_RAW_LOCK_NESTING always follows the PREEMP_RT
rules and does not expect trickery like the above.

Something like the completely untested below might be of help..

---
diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index d22430840b53..f3120d6a7d9e 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -33,6 +33,7 @@ enum lockdep_wait_type {
  enum lockdep_lock_type {
      LD_LOCK_NORMAL = 0,    /* normal, catch all */
      LD_LOCK_PERCPU,        /* percpu */
+    LD_LOCK_WAIT,        /* annotation */
      LD_LOCK_MAX,
  };
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 50d4863974e7..a4077f5bb75b 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2279,8 +2279,9 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
       * As a result, we will skip local_lock(), when we search for irq
       * inversion bugs.
       */
-    if (entry->class->lock_type == LD_LOCK_PERCPU) {
-        if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
+    if (entry->class->lock_type != LD_LOCK_NORMAL) {
+        if (entry->class->lock_type == LD_LOCK_PERCPU &&
+            DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
              return false;
          return true;
@@ -4752,7 +4753,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
      for (; depth < curr->lockdep_depth; depth++) {
          struct held_lock *prev = curr->held_locks + depth;
-        u8 prev_inner = hlock_class(prev)->wait_type_inner;
+        struct lock_class *class = hlock_class(prev);
+        u8 prev_inner = class->wait_type_inner;
          if (prev_inner) {
              /*
@@ -4762,6 +4764,12 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
               * Also due to trylocks.
               */
              curr_inner = min(curr_inner, prev_inner);
+
+            /*
+             * Allow override for annotations.
+             */
+            if (unlikely(class->lock_type == LD_LOCK_WAIT))
+                curr_inner = prev_inner;
          }
      }
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index df86e649d8be..fae71ef72a16 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -565,8 +565,16 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
       * On RT enabled kernels the pool refill must happen in preemptible
       * context:
       */
-    if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+    if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+        static lockdep_map dep_map = {

                static struct lockdep_map dep_map = {

+            .name = "wait-type-override",
+            .wait_type_inner = LD_WAIT_SLEEP,
+            .lock_type = LD_LOCK_WAIT,
+        };
+        lock_map_acquire(&dep_map);
          fill_pool();
+        lock_map_release(&dep_map);
+    }
      db = get_bucket((unsigned long) addr);

I just tested the above code, and then got the following
warning:



It seems that the LD_WAIT_SLEEP we set is already greater than the
LD_WAIT_SPIN of the current context.


Can we do something like below? This solves the warning I encountered.

diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index d22430840b53..f3120d6a7d9e 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -33,6 +33,7 @@ enum lockdep_wait_type {
enum lockdep_lock_type {
LD_LOCK_NORMAL = 0, /* normal, catch all */
LD_LOCK_PERCPU, /* percpu */
+ LD_LOCK_WAIT, /* annotation */
LD_LOCK_MAX,
};

diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index dcd1d5bfc1e0..6859dba8a3aa 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -2289,8 +2289,9 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
* As a result, we will skip local_lock(), when we search for irq
* inversion bugs.
*/
- if (entry->class->lock_type == LD_LOCK_PERCPU) {
- if (DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
+ if (entry->class->lock_type != LD_LOCK_NORMAL) {
+ if (entry->class->lock_type == LD_LOCK_PERCPU &&
+ DEBUG_LOCKS_WARN_ON(entry->class->wait_type_inner < LD_WAIT_CONFIG))
return false;

return true;
@@ -3981,6 +3982,9 @@ static inline int
valid_state(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
{
+ if (unlikely(hlock_class(this)->lock_type == LD_LOCK_WAIT))
+ return 1;
+
if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) {
graph_unlock();
print_usage_bug(curr, this, bad_bit, new_bit);
@@ -4768,7 +4772,8 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)

for (; depth < curr->lockdep_depth; depth++) {
struct held_lock *prev = curr->held_locks + depth;
- u8 prev_inner = hlock_class(prev)->wait_type_inner;
+ struct lock_class *class = hlock_class(prev);
+ u8 prev_inner = class->wait_type_inner;

if (prev_inner) {
/*
@@ -4778,9 +4783,19 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
* Also due to trylocks.
*/
curr_inner = min(curr_inner, prev_inner);
+
+ /*
+ * Allow override for annotations.
+ */
+ if (unlikely(class->lock_type == LD_LOCK_WAIT))
+ curr_inner = prev_inner;
}
}

+ if (unlikely(hlock_class(next)->lock_type == LD_LOCK_WAIT &&
+ curr_inner == LD_WAIT_SPIN))
+ curr_inner = LD_WAIT_CONFIG;
+
if (next_outer > curr_inner)
return print_lock_invalid_wait_context(curr, next);

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index df86e649d8be..a8a69991b0d0 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -565,8 +565,16 @@ __debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack
* On RT enabled kernels the pool refill must happen in preemptible
* context:
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible())
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+ static struct lockdep_map dep_map = {
+ .name = "wait-type-override",
+ .wait_type_inner = LD_WAIT_CONFIG,
+ .lock_type = LD_LOCK_WAIT,
+ };
+ lock_map_acquire(&dep_map);
fill_pool();
+ lock_map_release(&dep_map);
+ }

db = get_bucket((unsigned long) addr);

--
Thanks,
Qi