[PATCH v3 09/57] sched: Simplify ttwu()

From: Peter Zijlstra
Date: Mon Jun 12 2023 - 05:55:59 EST


Use guards to reduce gotos and simplify control flow.

Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 220 +++++++++++++++++++++++++---------------------------
1 file changed, 108 insertions(+), 112 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3664,16 +3664,15 @@ ttwu_stat(struct task_struct *p, int cpu
__schedstat_inc(p->stats.nr_wakeups_local);
} else {
struct sched_domain *sd;
+ guard(rcu)();

__schedstat_inc(p->stats.nr_wakeups_remote);
- rcu_read_lock();
for_each_domain(rq->cpu, sd) {
if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
__schedstat_inc(sd->ttwu_wake_remote);
break;
}
}
- rcu_read_unlock();
}

if (wake_flags & WF_MIGRATED)
@@ -4135,10 +4134,9 @@ bool ttwu_state_match(struct task_struct
static int
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{
- unsigned long flags;
+ guard(preempt)();
int cpu, success = 0;

- preempt_disable();
if (p == current) {
/*
* We're waking current, this means 'p->on_rq' and 'task_cpu(p)
@@ -4165,129 +4163,127 @@ try_to_wake_up(struct task_struct *p, un
* reordered with p->state check below. This pairs with smp_store_mb()
* in set_current_state() that the waiting thread does.
*/
- raw_spin_lock_irqsave(&p->pi_lock, flags);
- smp_mb__after_spinlock();
- if (!ttwu_state_match(p, state, &success))
- goto unlock;
+ scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
+ smp_mb__after_spinlock();
+ if (!ttwu_state_match(p, state, &success))
+ break;

- trace_sched_waking(p);
+ trace_sched_waking(p);

- /*
- * Ensure we load p->on_rq _after_ p->state, otherwise it would
- * be possible to, falsely, observe p->on_rq == 0 and get stuck
- * in smp_cond_load_acquire() below.
- *
- * sched_ttwu_pending() try_to_wake_up()
- * STORE p->on_rq = 1 LOAD p->state
- * UNLOCK rq->lock
- *
- * __schedule() (switch to task 'p')
- * LOCK rq->lock smp_rmb();
- * smp_mb__after_spinlock();
- * UNLOCK rq->lock
- *
- * [task p]
- * STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq
- *
- * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
- * __schedule(). See the comment for smp_mb__after_spinlock().
- *
- * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
- */
- smp_rmb();
- if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
- goto unlock;
+ /*
+ * Ensure we load p->on_rq _after_ p->state, otherwise it would
+ * be possible to, falsely, observe p->on_rq == 0 and get stuck
+ * in smp_cond_load_acquire() below.
+ *
+ * sched_ttwu_pending() try_to_wake_up()
+ * STORE p->on_rq = 1 LOAD p->state
+ * UNLOCK rq->lock
+ *
+ * __schedule() (switch to task 'p')
+ * LOCK rq->lock smp_rmb();
+ * smp_mb__after_spinlock();
+ * UNLOCK rq->lock
+ *
+ * [task p]
+ * STORE p->state = UNINTERRUPTIBLE LOAD p->on_rq
+ *
+ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+ * __schedule(). See the comment for smp_mb__after_spinlock().
+ *
+ * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
+ */
+ smp_rmb();
+ if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
+ break;

#ifdef CONFIG_SMP
- /*
- * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
- * possible to, falsely, observe p->on_cpu == 0.
- *
- * One must be running (->on_cpu == 1) in order to remove oneself
- * from the runqueue.
- *
- * __schedule() (switch to task 'p') try_to_wake_up()
- * STORE p->on_cpu = 1 LOAD p->on_rq
- * UNLOCK rq->lock
- *
- * __schedule() (put 'p' to sleep)
- * LOCK rq->lock smp_rmb();
- * smp_mb__after_spinlock();
- * STORE p->on_rq = 0 LOAD p->on_cpu
- *
- * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
- * __schedule(). See the comment for smp_mb__after_spinlock().
- *
- * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
- * schedule()'s deactivate_task() has 'happened' and p will no longer
- * care about it's own p->state. See the comment in __schedule().
- */
- smp_acquire__after_ctrl_dep();
+ /*
+ * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+ * possible to, falsely, observe p->on_cpu == 0.
+ *
+ * One must be running (->on_cpu == 1) in order to remove oneself
+ * from the runqueue.
+ *
+ * __schedule() (switch to task 'p') try_to_wake_up()
+ * STORE p->on_cpu = 1 LOAD p->on_rq
+ * UNLOCK rq->lock
+ *
+ * __schedule() (put 'p' to sleep)
+ * LOCK rq->lock smp_rmb();
+ * smp_mb__after_spinlock();
+ * STORE p->on_rq = 0 LOAD p->on_cpu
+ *
+ * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+ * __schedule(). See the comment for smp_mb__after_spinlock().
+ *
+ * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
+ * schedule()'s deactivate_task() has 'happened' and p will no longer
+ * care about it's own p->state. See the comment in __schedule().
+ */
+ smp_acquire__after_ctrl_dep();

- /*
- * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
- * == 0), which means we need to do an enqueue, change p->state to
- * TASK_WAKING such that we can unlock p->pi_lock before doing the
- * enqueue, such as ttwu_queue_wakelist().
- */
- WRITE_ONCE(p->__state, TASK_WAKING);
+ /*
+ * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
+ * == 0), which means we need to do an enqueue, change p->state to
+ * TASK_WAKING such that we can unlock p->pi_lock before doing the
+ * enqueue, such as ttwu_queue_wakelist().
+ */
+ WRITE_ONCE(p->__state, TASK_WAKING);

- /*
- * If the owning (remote) CPU is still in the middle of schedule() with
- * this task as prev, considering queueing p on the remote CPUs wake_list
- * which potentially sends an IPI instead of spinning on p->on_cpu to
- * let the waker make forward progress. This is safe because IRQs are
- * disabled and the IPI will deliver after on_cpu is cleared.
- *
- * Ensure we load task_cpu(p) after p->on_cpu:
- *
- * set_task_cpu(p, cpu);
- * STORE p->cpu = @cpu
- * __schedule() (switch to task 'p')
- * LOCK rq->lock
- * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
- * STORE p->on_cpu = 1 LOAD p->cpu
- *
- * to ensure we observe the correct CPU on which the task is currently
- * scheduling.
- */
- if (smp_load_acquire(&p->on_cpu) &&
- ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
- goto unlock;
+ /*
+ * If the owning (remote) CPU is still in the middle of schedule() with
+ * this task as prev, considering queueing p on the remote CPUs wake_list
+ * which potentially sends an IPI instead of spinning on p->on_cpu to
+ * let the waker make forward progress. This is safe because IRQs are
+ * disabled and the IPI will deliver after on_cpu is cleared.
+ *
+ * Ensure we load task_cpu(p) after p->on_cpu:
+ *
+ * set_task_cpu(p, cpu);
+ * STORE p->cpu = @cpu
+ * __schedule() (switch to task 'p')
+ * LOCK rq->lock
+ * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
+ * STORE p->on_cpu = 1 LOAD p->cpu
+ *
+ * to ensure we observe the correct CPU on which the task is currently
+ * scheduling.
+ */
+ if (smp_load_acquire(&p->on_cpu) &&
+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
+ break;

- /*
- * If the owning (remote) CPU is still in the middle of schedule() with
- * this task as prev, wait until it's done referencing the task.
- *
- * Pairs with the smp_store_release() in finish_task().
- *
- * This ensures that tasks getting woken will be fully ordered against
- * their previous state and preserve Program Order.
- */
- smp_cond_load_acquire(&p->on_cpu, !VAL);
+ /*
+ * If the owning (remote) CPU is still in the middle of schedule() with
+ * this task as prev, wait until it's done referencing the task.
+ *
+ * Pairs with the smp_store_release() in finish_task().
+ *
+ * This ensures that tasks getting woken will be fully ordered against
+ * their previous state and preserve Program Order.
+ */
+ smp_cond_load_acquire(&p->on_cpu, !VAL);

- cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
- if (task_cpu(p) != cpu) {
- if (p->in_iowait) {
- delayacct_blkio_end(p);
- atomic_dec(&task_rq(p)->nr_iowait);
- }
+ cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
+ if (task_cpu(p) != cpu) {
+ if (p->in_iowait) {
+ delayacct_blkio_end(p);
+ atomic_dec(&task_rq(p)->nr_iowait);
+ }

- wake_flags |= WF_MIGRATED;
- psi_ttwu_dequeue(p);
- set_task_cpu(p, cpu);
- }
+ wake_flags |= WF_MIGRATED;
+ psi_ttwu_dequeue(p);
+ set_task_cpu(p, cpu);
+ }
#else
- cpu = task_cpu(p);
+ cpu = task_cpu(p);
#endif /* CONFIG_SMP */

- ttwu_queue(p, cpu, wake_flags);
-unlock:
- raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ ttwu_queue(p, cpu, wake_flags);
+ }
out:
if (success)
ttwu_stat(p, task_cpu(p), wake_flags);
- preempt_enable();

return success;
}