Re: [PATCH v7 05/16] lockdep: Implement crossrelease feature

From: Peter Zijlstra
Date: Tue Jul 11 2017 - 12:13:15 EST



Sorry for the much delayed response; aside from the usual backlog I got
unusually held up by family responsibilities.

My comments in the form of a patch..


--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -542,10 +542,10 @@ extern void crossrelease_hardirq_start(v
extern void crossrelease_hardirq_end(void);
extern void crossrelease_softirq_start(void);
extern void crossrelease_softirq_end(void);
-extern void crossrelease_work_start(void);
-extern void crossrelease_work_end(void);
-extern void init_crossrelease_task(struct task_struct *task);
-extern void free_crossrelease_task(struct task_struct *task);
+extern void crossrelease_hist_start(void);
+extern void crossrelease_hist_end(void);
+extern void lockdep_init_task(struct task_struct *task);
+extern void lockdep_free_task(struct task_struct *task);
#else
/*
* To initialize a lockdep_map statically use this macro.
@@ -558,10 +558,10 @@ static inline void crossrelease_hardirq_
static inline void crossrelease_hardirq_end(void) {}
static inline void crossrelease_softirq_start(void) {}
static inline void crossrelease_softirq_end(void) {}
-static inline void crossrelease_work_start(void) {}
-static inline void crossrelease_work_end(void) {}
-static inline void init_crossrelease_task(struct task_struct *task) {}
-static inline void free_crossrelease_task(struct task_struct *task) {}
+static inline void crossrelease_hist_start(void) {}
+static inline void crossrelease_hist_end(void) {}
+static inline void lockdep_init_task(struct task_struct *task) {}
+static inline void lockdep_free_task(struct task_struct *task) {}
#endif

#ifdef CONFIG_LOCK_STAT
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -821,7 +821,7 @@ struct task_struct {
unsigned int xhlock_idx;
unsigned int xhlock_idx_soft; /* For restoring at softirq exit */
unsigned int xhlock_idx_hard; /* For restoring at hardirq exit */
- unsigned int xhlock_idx_work; /* For restoring at work exit */
+ unsigned int xhlock_idx_hist; /* For restoring at history boundaries */
#endif
#ifdef CONFIG_UBSAN
unsigned int in_ubsan;
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -933,7 +933,7 @@ void __noreturn do_exit(long code)
exit_rcu();
TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));

- free_crossrelease_task(tsk);
+ lockdep_free_task(tsk);
do_task_dead();
}
EXPORT_SYMBOL_GPL(do_exit);
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -485,7 +485,7 @@ void __init fork_init(void)
for (i = 0; i < UCOUNT_COUNTS; i++) {
init_user_ns.ucount_max[i] = max_threads/2;
}
- init_crossrelease_task(&init_task);
+ lockdep_init_task(&init_task);

#ifdef CONFIG_VMAP_STACK
cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache",
@@ -1694,7 +1694,7 @@ static __latent_entropy struct task_stru
p->lockdep_depth = 0; /* no locks held yet */
p->curr_chain_key = 0;
p->lockdep_recursion = 0;
- init_crossrelease_task(p);
+ lockdep_init_task(p);
#endif

#ifdef CONFIG_DEBUG_MUTEXES
@@ -1953,7 +1953,7 @@ static __latent_entropy struct task_stru
bad_fork_cleanup_perf:
perf_event_free_task(p);
bad_fork_cleanup_policy:
- free_crossrelease_task(p);
+ lockdep_free_task(p);
#ifdef CONFIG_NUMA
mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock:
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3381,8 +3381,8 @@ static int __lock_acquire(struct lockdep
unsigned int depth;
int chain_head = 0;
int class_idx;
- int ret;
u64 chain_key;
+ int ret;

if (unlikely(!debug_locks))
return 0;
@@ -4653,6 +4653,13 @@ asmlinkage __visible void lockdep_sys_ex
curr->comm, curr->pid);
lockdep_print_held_locks(curr);
}
+
+ /*
+ * The lock history for each syscall should be independent. So wipe the
+ * slate clean on return to userspace.
+ */
+ crossrelease_hist_end();
+ crossrelease_hist_start();
}

void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
@@ -4708,6 +4715,29 @@ EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious

#ifdef CONFIG_LOCKDEP_CROSSRELEASE

+/*
+ * Crossrelease works by recording a lock history for each thread and
+ * connecting those historic locks that were taken after the
+ * wait_for_completion() in the complete() context.
+ *
+ * Task-A Task-B
+ *
+ * mutex_lock(&A);
+ * mutex_unlock(&A);
+ *
+ * wait_for_completion(&C);
+ * lock_acquire_crosslock();
+ * atomic_inc_return(&cross_gen_id);
+ * |
+ * | mutex_lock(&B);
+ * | mutex_unlock(&B);
+ * |
+ * | complete(&C);
+ * `-- lock_commit_crosslock();
+ *
+ * Which will then add a dependency between B and C.
+ */
+
#define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR])

/*
@@ -4715,6 +4745,25 @@ EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious
*/
static atomic_t cross_gen_id; /* Can be wrapped */

+/*
+ * Lock history stacks; we have 3 nested lock history stacks:
+ *
+ * Hard IRQ
+ * Soft IRQ
+ * History / Task
+ *
+ * The thing is that once we complete a (Hard/Soft) IRQ the future task locks
+ * should not depend on any of the locks observed while running the IRQ.
+ *
+ * So what we do is rewind the history buffer and erase all our knowledge of
+ * that temporal event.
+ *
+ * If the rewind wraps the history ring buffer ... XXX explain how we'll
+ * discard stuff. I cannot readily find how a rewind of exactly MAX_XHLOCKS_NR
+ * is not a NOP... should we make xhlock_valid() trigger when the rewind >=
+ * MAX_XHLOCKS_NR ? Possibly re-instroduce hist_gen_id ?
+ */
+
void crossrelease_hardirq_start(void)
{
if (current->xhlocks)
@@ -4740,20 +4789,31 @@ void crossrelease_softirq_end(void)
}

/*
- * Each work of workqueue might run in a different context,
- * thanks to concurrency support of workqueue. So we have to
- * distinguish each work to avoid false positive.
+ * We need this to annotate lock history boundaries. Take for instance
+ * workqueues; each work is independent of the last. The completion of a future
+ * work does not depend on the completion of a past work (in general).
+ * Therefore we must not carry that (lock) dependency across works.
+ *
+ * This is true for many things; pretty much all kthreads fall into this
+ * pattern, where they have an 'idle' state and future completions do not
+ * depend on past completions. Its just that since they all have the 'same'
+ * form -- the kthread does the same over and over -- it doesn't typically
+ * matter.
+ *
+ * The same is true for system-calls, once a system call is completed (we've
+ * returned to userspace) the next system call does not depend on the lock
+ * history of the previous system call.
*/
-void crossrelease_work_start(void)
+void crossrelease_hist_start(void)
{
if (current->xhlocks)
- current->xhlock_idx_work = current->xhlock_idx;
+ current->xhlock_idx_hist = current->xhlock_idx;
}

-void crossrelease_work_end(void)
+void crossrelease_hist_end(void)
{
if (current->xhlocks)
- current->xhlock_idx = current->xhlock_idx_work;
+ current->xhlock_idx = current->xhlock_idx_hist;
}

static int cross_lock(struct lockdep_map *lock)
@@ -5053,17 +5113,17 @@ static void cross_init(struct lockdep_ma
BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1));
}

-void init_crossrelease_task(struct task_struct *task)
+void lockdep_init_task(struct task_struct *task)
{
task->xhlock_idx = UINT_MAX;
task->xhlock_idx_soft = UINT_MAX;
task->xhlock_idx_hard = UINT_MAX;
- task->xhlock_idx_work = UINT_MAX;
+ task->xhlock_idx_hist = UINT_MAX;
task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR,
GFP_KERNEL);
}

-void free_crossrelease_task(struct task_struct *task)
+void lockdep_free_task(struct task_struct *task)
{
if (task->xhlocks) {
void *tmp = task->xhlocks;
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2093,7 +2093,7 @@ __acquires(&pool->lock)

lock_map_acquire_read(&pwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
- crossrelease_work_start();
+ crossrelease_hist_start();
trace_workqueue_execute_start(work);
worker->current_func(work);
/*
@@ -2101,7 +2101,7 @@ __acquires(&pool->lock)
* point will only record its address.
*/
trace_workqueue_execute_end(work);
- crossrelease_work_end();
+ crossrelease_hist_end();
lock_map_release(&lockdep_map);
lock_map_release(&pwq->wq->lockdep_map);