[RFC PATCH 1/3] sched/fair: Call newidle_balance() from finish_task_switch()

From: Scott Wood
Date: Tue Apr 28 2020 - 01:03:03 EST


Thus, newidle_balance() is entered with interrupts enabled, which allows
(in the next patch) enabling interrupts when the lock is dropped.

Signed-off-by: Scott Wood <swood@xxxxxxxxxx>
---
kernel/sched/core.c | 7 ++++---
kernel/sched/fair.c | 45 ++++++++++++++++----------------------------
kernel/sched/sched.h | 6 ++----
3 files changed, 22 insertions(+), 36 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9a2fbf98fd6f..0294beb8d16c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3241,6 +3241,10 @@ static struct rq *finish_task_switch(struct task_struct *prev)
}

tick_nohz_task_switch();
+
+ if (is_idle_task(current))
+ newidle_balance();
+
return rq;
}

@@ -3919,8 +3923,6 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
rq->nr_running == rq->cfs.h_nr_running)) {

p = pick_next_task_fair(rq, prev, rf);
- if (unlikely(p == RETRY_TASK))
- goto restart;

/* Assumes fair_sched_class->next == idle_sched_class */
if (!p) {
@@ -3931,7 +3933,6 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
return p;
}

-restart:
#ifdef CONFIG_SMP
/*
* We must do the balancing pass before put_next_task(), such
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 02f323b85b6d..74c3c5280d6b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6758,8 +6758,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
if (rq->nr_running)
return 1;
-
- return newidle_balance(rq, rf) != 0;
}
#endif /* CONFIG_SMP */

@@ -6934,9 +6932,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
struct cfs_rq *cfs_rq = &rq->cfs;
struct sched_entity *se;
struct task_struct *p;
- int new_tasks;

-again:
if (!sched_fair_runnable(rq))
goto idle;

@@ -7050,19 +7046,6 @@ done: __maybe_unused;
if (!rf)
return NULL;

- new_tasks = newidle_balance(rq, rf);
-
- /*
- * Because newidle_balance() releases (and re-acquires) rq->lock, it is
- * possible for any higher priority task to appear. In that case we
- * must re-start the pick_next_entity() loop.
- */
- if (new_tasks < 0)
- return RETRY_TASK;
-
- if (new_tasks > 0)
- goto again;
-
/*
* rq is about to be idle, check if we need to update the
* lost_idle_time of clock_pelt
@@ -10425,14 +10408,23 @@ static inline void nohz_newidle_balance(struct rq *this_rq) { }
* 0 - failed, no new tasks
* > 0 - success, new (fair) tasks present
*/
-int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
+int newidle_balance(void)
{
unsigned long next_balance = jiffies + HZ;
- int this_cpu = this_rq->cpu;
+ int this_cpu;
struct sched_domain *sd;
+ struct rq *this_rq;
int pulled_task = 0;
u64 curr_cost = 0;

+ preempt_disable();
+ this_rq = this_rq();
+ this_cpu = this_rq->cpu;
+ local_bh_disable();
+ raw_spin_lock_irq(&this_rq->lock);
+
+ update_rq_clock(this_rq);
+
update_misfit_status(NULL, this_rq);
/*
* We must set idle_stamp _before_ calling idle_balance(), such that we
@@ -10444,15 +10436,7 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
* Do not pull tasks towards !active CPUs...
*/
if (!cpu_active(this_cpu))
- return 0;
-
- /*
- * This is OK, because current is on_cpu, which avoids it being picked
- * for load-balance and preemption/IRQs are still disabled avoiding
- * further scheduler activity on it and we're being very careful to
- * re-start the picking loop.
- */
- rq_unpin_lock(this_rq, rf);
+ goto out_unlock;

if (this_rq->avg_idle < sysctl_sched_migration_cost ||
!READ_ONCE(this_rq->rd->overload)) {
@@ -10534,7 +10518,10 @@ int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
if (pulled_task)
this_rq->idle_stamp = 0;

- rq_repin_lock(this_rq, rf);
+out_unlock:
+ raw_spin_unlock_irq(&this_rq->lock);
+ local_bh_enable();
+ preempt_enable();

return pulled_task;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index db3a57675ccf..3d97c51544d7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1504,13 +1504,13 @@ static inline void unregister_sched_domain_sysctl(void)
}
#endif

-extern int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
+extern int newidle_balance(void);

#else

static inline void sched_ttwu_pending(void) { }

-static inline int newidle_balance(struct rq *this_rq, struct rq_flags *rf) { return 0; }
+static inline int newidle_balance(void) { return 0; }

#endif /* CONFIG_SMP */

@@ -1742,8 +1742,6 @@ extern const u32 sched_prio_to_wmult[40];
#define ENQUEUE_MIGRATED 0x00
#endif

-#define RETRY_TASK ((void *)-1UL)
-
struct sched_class {
const struct sched_class *next;

--
2.18.2