[PATCH v3 12/14] sched/rt: Fix proxy/current (push,pull)ability

From: John Stultz
Date: Tue Apr 11 2023 - 00:27:00 EST


From: Valentin Schneider <valentin.schneider@xxxxxxx>

Proxy execution forms atomic pairs of tasks: a proxy (scheduling context)
and an owner (execution context). The proxy, along with the rest of the
blocked chain, follows the owner wrt CPU placement.

They can be the same task, in which case push/pull doesn't need any
modification. When they are different, however,
FIFO1 & FIFO42:

,-> RT42
| | blocked-on
| v
blocked_proxy | mutex
| | owner
| v
`-- RT1

RT1
RT42

CPU0 CPU1
^ ^
| |
overloaded !overloaded
rq prio = 42 rq prio = 0

RT1 is eligible to be pushed to CPU1, but should that happen it will
"carry" RT42 along. Clearly here neither RT1 nor RT42 must be seen as
push/pullable.

Furthermore, tasks becoming blocked on a mutex don't need an explicit
dequeue/enqueue cycle to be made (push/pull)able: they have to be running
to block on a mutex, thus they will eventually hit put_prev_task().

XXX: pinned tasks becoming unblocked should be removed from the push/pull
lists, but those don't get to see __schedule() straight away.

Cc: Joel Fernandes <joelaf@xxxxxxxxxx>
Cc: Qais Yousef <qyousef@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Dietmar Eggemann <dietmar.eggemann@xxxxxxx>
Cc: Valentin Schneider <vschneid@xxxxxxxxxx>
Cc: Steven Rostedt <rostedt@xxxxxxxxxxx>
Cc: Ben Segall <bsegall@xxxxxxxxxx>
Cc: Zimuzo Ezeozue <zezeozue@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxx>
Cc: Daniel Bristot de Oliveira <bristot@xxxxxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Waiman Long <longman@xxxxxxxxxx>
Cc: Boqun Feng <boqun.feng@xxxxxxxxx>
Cc: "Paul E . McKenney" <paulmck@xxxxxxxxxx>
Signed-off-by: Valentin Schneider <valentin.schneider@xxxxxxx>
Signed-off-by: Connor O'Brien <connoro@xxxxxxxxxx>
Signed-off-by: John Stultz <jstultz@xxxxxxxxxx>
---
v3:
* Tweaked comments & commit message

TODO: Rework the wording of the commit message to match the rq_selected
renaming. (XXX Maybe "Delegator" for the task being proxied for?)
---
kernel/sched/core.c | 37 +++++++++++++++++++++++++++----------
kernel/sched/rt.c | 22 +++++++++++++++++-----
2 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1d92f1a304b8..033856bae002 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7072,12 +7072,29 @@ proxy(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
WARN_ON_ONCE(!owner->on_rq);
return owner;
}
+
+static inline void proxy_tag_curr(struct rq *rq, struct task_struct *next)
+{
+ /*
+ * pick_next_task() calls set_next_task() on the selected task
+ * at some point, which ensures it is not push/pullable.
+ * However, the selected task *and* the ,mutex owner form an
+ * atomic pair wrt push/pull.
+ *
+ * Make sure owner is not pushable. Unfortunately we can only
+ * deal with that by means of a dequeue/enqueue cycle. :-/
+ */
+ dequeue_task(rq, next, DEQUEUE_NOCLOCK | DEQUEUE_SAVE);
+ enqueue_task(rq, next, ENQUEUE_NOCLOCK | ENQUEUE_RESTORE);
+}
#else /* PROXY_EXEC */
static struct task_struct *
proxy(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
{
return next;
}
+
+static inline void proxy_tag_curr(struct rq *rq, struct task_struct *next) { }
#endif /* PROXY_EXEC */

/*
@@ -7126,6 +7143,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
unsigned long prev_state;
struct rq_flags rf;
struct rq *rq;
+ bool proxied;
int cpu;
bool preserve_need_resched = false;

@@ -7199,20 +7217,11 @@ static void __sched notrace __schedule(unsigned int sched_mode)
atomic_inc(&rq->nr_iowait);
delayacct_blkio_start();
}
- } else {
- /*
- * XXX
- * Let's make this task, which is blocked on
- * a mutex, (push/pull)able (RT/DL).
- * Unfortunately we can only deal with that by
- * means of a dequeue/enqueue cycle. :-/
- */
- dequeue_task(rq, prev, 0);
- enqueue_task(rq, prev, 0);
}
switch_count = &prev->nvcsw;
}

+ proxied = !!prev->blocked_proxy;
pick_again:
/*
* If picked task is actually blocked it means that it can act as a
@@ -7261,6 +7270,10 @@ static void __sched notrace __schedule(unsigned int sched_mode)
* changes to task_struct made by pick_next_task().
*/
rq_set_curr_rcu_init(rq, next);
+
+ if (unlikely(!task_current_proxy(rq, next)))
+ proxy_tag_curr(rq, next);
+
/*
* The membarrier system call requires each architecture
* to have a full memory barrier after updating
@@ -7285,6 +7298,10 @@ static void __sched notrace __schedule(unsigned int sched_mode)
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
} else {
+ /* In case next was already curr but just got blocked_proxy */
+ if (unlikely(!proxied && next->blocked_proxy))
+ proxy_tag_curr(rq, next);
+
rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);

rq_unpin_lock(rq, &rf);
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 5ce48eb8f5b6..af92e4147703 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1537,9 +1537,21 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)

enqueue_rt_entity(rt_se, flags);

- if (!task_current(rq, p) && p->nr_cpus_allowed > 1 &&
- !task_is_blocked(p))
- enqueue_pushable_task(rq, p);
+ /*
+ * Current can't be pushed away. Proxy is tied to current, so don't
+ * push it either.
+ */
+ if (task_current(rq, p) || task_current_proxy(rq, p))
+ return;
+
+ /*
+ * Pinned tasks can't be pushed.
+ * Affinity of blocked tasks doesn't matter.
+ */
+ if (!task_is_blocked(p) && p->nr_cpus_allowed == 1)
+ return;
+
+ enqueue_pushable_task(rq, p);
}

static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -1832,9 +1844,9 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)

/*
* The previous task needs to be made eligible for pushing
- * if it is still active
+ * if it is still active. Affinity of blocked task doesn't matter.
*/
- if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
+ if (on_rt_rq(&p->rt) && (p->nr_cpus_allowed > 1 || task_is_blocked(p)))
enqueue_pushable_task(rq, p);
}

--
2.40.0.577.gac1e443424-goog