[PATCH RFC 10/12] sched/coresched: Make core_pick_seq per run-queue

From: Joel Fernandes (Google)
Date: Sat Aug 15 2020 - 18:01:18 EST


From: Vineeth Pillai <viremana@xxxxxxxxxxxxxxxxxxx>

core_pick_seq is a core wide counter to identify if a task pick has
been made for a CPU by its sibling. But during hotplug scenarios,
pick cannot be made for CPUs that are offline and when they come up,
they get tricked because the counter is incremented.

So, make core_pick_seq per run-queue.

Signed-off-by: Vineeth Pillai <viremana@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: Joel Fernandes (Google) <joel@xxxxxxxxxxxxxxxxx>
---
kernel/sched/core.c | 19 ++++++++++---------
kernel/sched/sched.h | 2 +-
2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3e9df8221c62..48a49168e57f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4623,9 +4623,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
* pointers are all still valid), and we haven't scheduled the last
* pick yet, do so now.
*/
- if (rq->core->core_pick_seq == rq->core->core_task_seq &&
- rq->core->core_pick_seq != rq->core_sched_seq) {
- WRITE_ONCE(rq->core_sched_seq, rq->core->core_pick_seq);
+ if (rq->core_pick_seq == rq->core->core_task_seq &&
+ rq->core_pick_seq != rq->core_sched_seq) {
+ WRITE_ONCE(rq->core_sched_seq, rq->core_pick_seq);

next = rq->core_pick;
if (next != prev) {
@@ -4635,7 +4635,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)

trace_printk("pick pre selected (%u %u %u): %s/%d %lx\n",
rq->core->core_task_seq,
- rq->core->core_pick_seq,
+ rq->core_pick_seq,
rq->core_sched_seq,
next->comm, next->pid,
next->core_cookie);
@@ -4649,7 +4649,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
smt_mask = cpu_smt_mask(cpu);

/*
- * core->core_task_seq, core->core_pick_seq, rq->core_sched_seq
+ * core->core_task_seq, rq->core_pick_seq, rq->core_sched_seq
*
* @task_seq guards the task state ({en,de}queues)
* @pick_seq is the @task_seq we did a selection on
@@ -4667,8 +4667,10 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
struct rq *rq_i = cpu_rq(i);

trace_printk("CPU %d is in smt_mask, resetting\n", i);
-
- rq_i->core_pick = NULL;
+ if (rq_i->core_pick) {
+ WRITE_ONCE(rq_i->core_sched_seq, rq_i->core_pick_seq);
+ rq_i->core_pick = NULL;
+ }

if (rq_i->core_forceidle) {
need_sync = true;
@@ -4771,9 +4773,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
next_class:;
}

- rq->core->core_pick_seq = rq->core->core_task_seq;
next = rq->core_pick;
- rq->core_sched_seq = rq->core->core_pick_seq;

/* Something should have been selected for current CPU */
WARN_ON_ONCE(!next);
@@ -4801,6 +4801,7 @@ next_class:;
continue;

if (rq_i->curr != rq_i->core_pick) {
+ WRITE_ONCE(rq_i->core_pick_seq, rq->core->core_task_seq);
trace_printk("IPI(%d)\n", i);
resched_curr(rq_i);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2922e171a1f0..c7caece2df6e 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1036,6 +1036,7 @@ struct rq {
/* per rq */
struct rq *core;
struct task_struct *core_pick;
+ unsigned int core_pick_seq;
unsigned int core_enabled;
unsigned int core_sched_seq;
struct rb_root core_tree;
@@ -1045,7 +1046,6 @@ struct rq {

/* shared state */
unsigned int core_task_seq;
- unsigned int core_pick_seq;
unsigned long core_cookie;
unsigned int core_unsafe_nest;
#endif
--
2.28.0.220.ged08abb693-goog