Re: [PATCH v5 16/19] sched: Fix proxy/current (push,pull)ability

From: Dietmar Eggemann
Date: Tue Aug 22 2023 - 11:20:28 EST


On 19/08/2023 08:08, John Stultz wrote:
> From: Valentin Schneider <valentin.schneider@xxxxxxx>

[...]

> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index bee7082b294f..e8065fc5c894 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6656,6 +6656,21 @@ proxy(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
> raw_spin_unlock(&mutex->wait_lock);
> return ret;
> }
> +
> +static inline void proxy_tag_curr(struct rq *rq, struct task_struct *next)
> +{
> + /*
> + * pick_next_task() calls set_next_task() on the selected task
> + * at some point, which ensures it is not push/pullable.
> + * However, the selected task *and* the ,mutex owner form an
> + * atomic pair wrt push/pull.
> + *
> + * Make sure owner is not pushable. Unfortunately we can only
> + * deal with that by means of a dequeue/enqueue cycle. :-/
> + */
> + dequeue_task(rq, next, DEQUEUE_NOCLOCK | DEQUEUE_SAVE);
> + enqueue_task(rq, next, ENQUEUE_NOCLOCK | ENQUEUE_RESTORE);
> +}
> #else /* PROXY_EXEC */
> static struct task_struct *
> proxy(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
> @@ -6663,6 +6678,8 @@ proxy(struct rq *rq, struct task_struct *next, struct rq_flags *rf)
> BUG(); // This should never be called in the !PROXY case
> return next;
> }
> +
> +static inline void proxy_tag_curr(struct rq *rq, struct task_struct *next) { }
> #endif /* PROXY_EXEC */
>
> /*
> @@ -6711,6 +6728,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
> unsigned long prev_state;
> struct rq_flags rf;
> struct rq *rq;
> + bool proxied;
> int cpu;
>
> cpu = smp_processor_id();
> @@ -6760,6 +6778,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
> switch_count = &prev->nvcsw;
> }
>
> + proxied = (rq_selected(rq) != prev);

Looks like proxied isn't used here. (*)

> pick_again:
> next = pick_next_task(rq, rq_selected(rq), &rf);
> rq_set_selected(rq, next);
> @@ -6786,6 +6805,10 @@ static void __sched notrace __schedule(unsigned int sched_mode)
> * changes to task_struct made by pick_next_task().
> */
> RCU_INIT_POINTER(rq->curr, next);
> +
> + if (unlikely(!task_current_selected(rq, next)))
> + proxy_tag_curr(rq, next);
> +
> /*
> * The membarrier system call requires each architecture
> * to have a full memory barrier after updating
> @@ -6810,6 +6833,10 @@ static void __sched notrace __schedule(unsigned int sched_mode)
> /* Also unlocks the rq: */
> rq = context_switch(rq, prev, next, &rf);
> } else {
> + /* In case next was already curr but just got blocked_donor*/
> + if (unlikely(!task_current_selected(rq, next)))
> + proxy_tag_curr(rq, next);

(*) v4 had:

+ /* In case next was already curr but just got blocked_donor*/
+ if (unlikely(!proxied && next->blocked_donor))

> +
> rq->clock_update_flags &= ~(RQCF_ACT_SKIP|RQCF_REQ_SKIP);
>
> rq_unpin_lock(rq, &rf);

I miss changes in enqueue_task_rt() and put_prev_task_rt() related to
'affinity of blocked tasks doesn't matter' from v4.