[tip: sched/urgent] sched/fair: Fix pick_eevdf()

From: tip-bot2 for Benjamin Segall
Date: Tue Oct 03 2023 - 06:42:50 EST


The following commit has been merged into the sched/urgent branch of tip:

Commit-ID: 561c58efd2394d76a32254d91e4b1de8ecdeb5c8
Gitweb: https://git.kernel.org/tip/561c58efd2394d76a32254d91e4b1de8ecdeb5c8
Author: Benjamin Segall <bsegall@xxxxxxxxxx>
AuthorDate: Fri, 29 Sep 2023 17:09:30 -07:00
Committer: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
CommitterDate: Tue, 03 Oct 2023 12:32:30 +02:00

sched/fair: Fix pick_eevdf()

The old pick_eevdf() could fail to find the actual earliest eligible
deadline when it descended to the right looking for min_deadline, but
it turned out that that min_deadline wasn't actually eligible. In that
case we need to go back and search through any left branches we
skipped looking for the actual best _eligible_ min_deadline.

This is more expensive, but still O(log n), and at worst should only
involve descending two branches of the rbtree.

I've run this through a userspace stress test (thank you
tools/lib/rbtree.c), so hopefully this implementation doesn't miss any
corner cases.

Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
Signed-off-by: Ben Segall <bsegall@xxxxxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Link: https://lkml.kernel.org/r/xm261qego72d.fsf_-_@xxxxxxxxxx
---
kernel/sched/fair.c | 72 +++++++++++++++++++++++++++++++++++---------
1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ef7490c..929d21d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -872,14 +872,16 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
*
* Which allows an EDF like search on (sub)trees.
*/
-static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+static struct sched_entity *__pick_eevdf(struct cfs_rq *cfs_rq)
{
struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
struct sched_entity *curr = cfs_rq->curr;
struct sched_entity *best = NULL;
+ struct sched_entity *best_left = NULL;

if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
curr = NULL;
+ best = curr;

/*
* Once selected, run a task until it either becomes non-eligible or
@@ -900,33 +902,75 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
}

/*
- * If this entity has an earlier deadline than the previous
- * best, take this one. If it also has the earliest deadline
- * of its subtree, we're done.
+ * Now we heap search eligible trees for the best (min_)deadline
*/
- if (!best || deadline_gt(deadline, best, se)) {
+ if (!best || deadline_gt(deadline, best, se))
best = se;
- if (best->deadline == best->min_deadline)
- break;
- }

/*
- * If the earlest deadline in this subtree is in the fully
- * eligible left half of our space, go there.
+ * Every se in a left branch is eligible, keep track of the
+ * branch with the best min_deadline
*/
+ if (node->rb_left) {
+ struct sched_entity *left = __node_2_se(node->rb_left);
+
+ if (!best_left || deadline_gt(min_deadline, best_left, left))
+ best_left = left;
+
+ /*
+ * min_deadline is in the left branch. rb_left and all
+ * descendants are eligible, so immediately switch to the second
+ * loop.
+ */
+ if (left->min_deadline == se->min_deadline)
+ break;
+ }
+
+ /* min_deadline is at this node, no need to look right */
+ if (se->deadline == se->min_deadline)
+ break;
+
+ /* else min_deadline is in the right branch. */
+ node = node->rb_right;
+ }
+
+ /*
+ * We ran into an eligible node which is itself the best.
+ * (Or nr_running == 0 and both are NULL)
+ */
+ if (!best_left || (s64)(best_left->min_deadline - best->deadline) > 0)
+ return best;
+
+ /*
+ * Now best_left and all of its children are eligible, and we are just
+ * looking for deadline == min_deadline
+ */
+ node = &best_left->run_node;
+ while (node) {
+ struct sched_entity *se = __node_2_se(node);
+
+ /* min_deadline is the current node */
+ if (se->deadline == se->min_deadline)
+ return se;
+
+ /* min_deadline is in the left branch */
if (node->rb_left &&
__node_2_se(node->rb_left)->min_deadline == se->min_deadline) {
node = node->rb_left;
continue;
}

+ /* else min_deadline is in the right branch */
node = node->rb_right;
}
+ return NULL;
+}

- if (!best || (curr && deadline_gt(deadline, best, curr)))
- best = curr;
+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
+{
+ struct sched_entity *se = __pick_eevdf(cfs_rq);

- if (unlikely(!best)) {
+ if (!se) {
struct sched_entity *left = __pick_first_entity(cfs_rq);
if (left) {
pr_err("EEVDF scheduling fail, picking leftmost\n");
@@ -934,7 +978,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
}
}

- return best;
+ return se;
}

#ifdef CONFIG_SCHED_DEBUG