[tip: sched/core] sched: Consider task_struct::saved_state in wait_task_inactive()

From: tip-bot2 for Peter Zijlstra
Date: Mon Jun 05 2023 - 15:17:14 EST


The following commit has been merged into the sched/core branch of tip:

Commit-ID: 1c06918788e8ae6e69e4381a2806617312922524
Gitweb: https://git.kernel.org/tip/1c06918788e8ae6e69e4381a2806617312922524
Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Wed, 31 May 2023 16:39:07 +02:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Mon, 05 Jun 2023 21:11:03 +02:00

sched: Consider task_struct::saved_state in wait_task_inactive()

With the introduction of task_struct::saved_state in commit
5f220be21418 ("sched/wakeup: Prepare for RT sleeping spin/rwlocks")
matching the task state has gotten more complicated. That same commit
changed try_to_wake_up() to consider both states, but
wait_task_inactive() has been neglected.

Sebastian noted that the wait_task_inactive() usage in
ptrace_check_attach() can misbehave when ptrace_stop() is blocked on
the tasklist_lock after it sets TASK_TRACED.

Therefore extract a common helper from ttwu_state_match() and use that
to teach wait_task_inactive() about the PREEMPT_RT locks.

Originally-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Tested-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/20230601091234.GW83892@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
---
kernel/sched/core.c | 59 +++++++++++++++++++++++++++++++++++---------
1 file changed, 48 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 810cf7d..ac38225 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2213,6 +2213,39 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
rq_clock_skip_update(rq);
}

+static __always_inline
+int __task_state_match(struct task_struct *p, unsigned int state)
+{
+ if (READ_ONCE(p->__state) & state)
+ return 1;
+
+#ifdef CONFIG_PREEMPT_RT
+ if (READ_ONCE(p->saved_state) & state)
+ return -1;
+#endif
+ return 0;
+}
+
+static __always_inline
+int task_state_match(struct task_struct *p, unsigned int state)
+{
+#ifdef CONFIG_PREEMPT_RT
+ int match;
+
+ /*
+ * Serialize against current_save_and_set_rtlock_wait_state() and
+ * current_restore_rtlock_saved_state().
+ */
+ raw_spin_lock_irq(&p->pi_lock);
+ match = __task_state_match(p, state);
+ raw_spin_unlock_irq(&p->pi_lock);
+
+ return match;
+#else
+ return __task_state_match(p, state);
+#endif
+}
+
/*
* wait_task_inactive - wait for a thread to unschedule.
*
@@ -2231,7 +2264,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
*/
unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state)
{
- int running, queued;
+ int running, queued, match;
struct rq_flags rf;
unsigned long ncsw;
struct rq *rq;
@@ -2257,7 +2290,7 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
* is actually now running somewhere else!
*/
while (task_on_cpu(rq, p)) {
- if (!(READ_ONCE(p->__state) & match_state))
+ if (!task_state_match(p, match_state))
return 0;
cpu_relax();
}
@@ -2272,8 +2305,15 @@ unsigned long wait_task_inactive(struct task_struct *p, unsigned int match_state
running = task_on_cpu(rq, p);
queued = task_on_rq_queued(p);
ncsw = 0;
- if (READ_ONCE(p->__state) & match_state)
+ if ((match = __task_state_match(p, match_state))) {
+ /*
+ * When matching on p->saved_state, consider this task
+ * still queued so it will wait.
+ */
+ if (match < 0)
+ queued = 1;
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
+ }
task_rq_unlock(rq, p, &rf);

/*
@@ -4003,15 +4043,14 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
static __always_inline
bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
{
+ int match;
+
if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)) {
WARN_ON_ONCE((state & TASK_RTLOCK_WAIT) &&
state != TASK_RTLOCK_WAIT);
}

- if (READ_ONCE(p->__state) & state) {
- *success = 1;
- return true;
- }
+ *success = !!(match = __task_state_match(p, state));

#ifdef CONFIG_PREEMPT_RT
/*
@@ -4027,12 +4066,10 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
* p::saved_state to TASK_RUNNING so any further tests will
* not result in false positives vs. @success
*/
- if (p->saved_state & state) {
+ if (match < 0)
p->saved_state = TASK_RUNNING;
- *success = 1;
- }
#endif
- return false;
+ return match > 0;
}

/*