[PATCH v2 1/4] sched/core: Provide sched_rtmutex() and expose sched work helpers

From: Sebastian Andrzej Siewior
Date: Thu Apr 27 2023 - 07:20:13 EST


From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

schedule() invokes sched_submit_work() before scheduling and
sched_update_worker() afterwards to ensure that queued block requests are
flushed and the (IO)worker machineries can instantiate new workers if
required. This avoids deadlocks and starvation.

With rt_mutexes this can lead to subtle problem:

When rtmutex blocks current::pi_blocked_on points to the rtmutex it
blocks on. When one of the functions in sched_submit/resume_work()
contends on a rtmutex based lock then that would corrupt
current::pi_blocked_on.

Make it possible to let rtmutex issue the calls outside of the slowpath,
i.e. when it is guaranteed that current::pi_blocked_on is NULL, by:

- Exposing sched_submit_work() and moving the task_running() condition
into schedule()

- Renamimg sched_update_worker() to sched_resume_work() and exposing it
too.

- Providing sched_rtmutex() which just does the inner loop of scheduling
until need_resched() is not longer set. Split out the loop so this does
not create yet another copy.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
include/linux/sched.h | 5 +++++
kernel/sched/core.c | 40 ++++++++++++++++++++++------------------
2 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 675298d6eb362..ff1ce66d8b6e3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -304,6 +304,11 @@ extern long schedule_timeout_idle(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
asmlinkage void preempt_schedule_irq(void);
+
+extern void sched_submit_work(void);
+extern void sched_resume_work(void);
+extern void schedule_rtmutex(void);
+
#ifdef CONFIG_PREEMPT_RT
extern void schedule_rtlock(void);
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c415418b0b847..7c5cfae086c78 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6690,14 +6690,11 @@ void __noreturn do_task_dead(void)
cpu_relax();
}

-static inline void sched_submit_work(struct task_struct *tsk)
+void sched_submit_work(void)
{
- unsigned int task_flags;
+ struct task_struct *tsk = current;
+ unsigned int task_flags = tsk->flags;

- if (task_is_running(tsk))
- return;
-
- task_flags = tsk->flags;
/*
* If a worker goes to sleep, notify and ask workqueue whether it
* wants to wake up a task to maintain concurrency.
@@ -6723,8 +6720,10 @@ static inline void sched_submit_work(struct task_struct *tsk)
blk_flush_plug(tsk->plug, true);
}

-static void sched_update_worker(struct task_struct *tsk)
+void sched_resume_work(void)
{
+ struct task_struct *tsk = current;
+
if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
if (tsk->flags & PF_WQ_WORKER)
wq_worker_running(tsk);
@@ -6733,20 +6732,29 @@ static void sched_update_worker(struct task_struct *tsk)
}
}

-asmlinkage __visible void __sched schedule(void)
+static void schedule_loop(unsigned int sched_mode)
{
- struct task_struct *tsk = current;
-
- sched_submit_work(tsk);
do {
preempt_disable();
- __schedule(SM_NONE);
+ __schedule(sched_mode);
sched_preempt_enable_no_resched();
} while (need_resched());
- sched_update_worker(tsk);
+}
+
+asmlinkage __visible void __sched schedule(void)
+{
+ if (!task_is_running(current))
+ sched_submit_work();
+ schedule_loop(SM_NONE);
+ sched_resume_work();
}
EXPORT_SYMBOL(schedule);

+void schedule_rtmutex(void)
+{
+ schedule_loop(SM_NONE);
+}
+
/*
* synchronize_rcu_tasks() makes sure that no task is stuck in preempted
* state (have scheduled out non-voluntarily) by making sure that all
@@ -6806,11 +6814,7 @@ void __sched schedule_preempt_disabled(void)
#ifdef CONFIG_PREEMPT_RT
void __sched notrace schedule_rtlock(void)
{
- do {
- preempt_disable();
- __schedule(SM_RTLOCK_WAIT);
- sched_preempt_enable_no_resched();
- } while (need_resched());
+ schedule_loop(SM_RTLOCK_WAIT);
}
NOKPROBE_SYMBOL(schedule_rtlock);
#endif
--
2.40.1