[RFC PATCH 1/3] sched: Make migrate_task_to() take any task

From: David Vernet
Date: Tue Jun 13 2023 - 01:20:25 EST


The migrate_task_to() function exposed from kernel/sched/core.c migrates
the current task, which is silently assumed to also be its first
argument, to the specified CPU. The function uses stop_one_cpu() to
migrate the task to the target CPU, which won't work if @p is not the
current task as the stop_one_cpu() callback isn't invoked on remote
CPUs.

While this operation is useful for task_numa_migrate() in fair.c, it
would be useful if __migrate_task() in core.c was given external
linkage, as it actually can be used to migrate any task to a CPU.

This patch therefore:
1. Renames the existing migrate_task_to() be called
numa_migrate_current_task_to().
2. Renames __migrate_task() to migrate_task_to(), gives it global
linkage, and updates all callers accordingly.

A follow-on patch will call the new migrate_task_to() from fair.c when
migrating a task in a shared wakequeue to a remote CPU.

Signed-off-by: David Vernet <void@xxxxxxxxxxxxx>
---
kernel/sched/core.c | 16 ++++++++--------
kernel/sched/fair.c | 2 +-
kernel/sched/sched.h | 4 +++-
3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ac38225e6d09..d911b0631e7b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2539,8 +2539,8 @@ struct set_affinity_pending {
* So we race with normal scheduler movements, but that's OK, as long
* as the task is no longer on this CPU.
*/
-static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
- struct task_struct *p, int dest_cpu)
+struct rq *migrate_task_to(struct rq *rq, struct rq_flags *rf,
+ struct task_struct *p, int dest_cpu)
{
/* Affinity changed (again). */
if (!is_cpu_allowed(p, dest_cpu))
@@ -2573,7 +2573,7 @@ static int migration_cpu_stop(void *data)
local_irq_save(rf.flags);
/*
* We need to explicitly wake pending tasks before running
- * __migrate_task() such that we will not miss enforcing cpus_ptr
+ * migrate_task_to() such that we will not miss enforcing cpus_ptr
* during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test.
*/
flush_smp_call_function_queue();
@@ -2605,12 +2605,12 @@ static int migration_cpu_stop(void *data)
}

if (task_on_rq_queued(p))
- rq = __migrate_task(rq, &rf, p, arg->dest_cpu);
+ rq = migrate_task_to(rq, &rf, p, arg->dest_cpu);
else
p->wake_cpu = arg->dest_cpu;

/*
- * XXX __migrate_task() can fail, at which point we might end
+ * XXX migrate_task_to() can fail, at which point we might end
* up running on a dodgy CPU, AFAICT this can only happen
* during CPU hotplug, at which point we'll get pushed out
* anyway, so it's probably not a big deal.
@@ -3259,7 +3259,7 @@ void force_compatible_cpus_allowed_ptr(struct task_struct *p)
alloc_cpumask_var(&new_mask, GFP_KERNEL);

/*
- * __migrate_task() can fail silently in the face of concurrent
+ * migrate_task_to() can fail silently in the face of concurrent
* offlining of the chosen destination CPU, so take the hotplug
* lock to ensure that the migration succeeds.
*/
@@ -9359,7 +9359,7 @@ bool sched_smp_initialized __read_mostly;

#ifdef CONFIG_NUMA_BALANCING
/* Migrate current task p to target_cpu */
-int migrate_task_to(struct task_struct *p, int target_cpu)
+int numa_migrate_current_task_to(struct task_struct *p, int target_cpu)
{
struct migration_arg arg = { p, target_cpu };
int curr_cpu = task_cpu(p);
@@ -9439,7 +9439,7 @@ static int __balance_push_cpu_stop(void *arg)

if (task_rq(p) == rq && task_on_rq_queued(p)) {
cpu = select_fallback_rq(rq->cpu, p);
- rq = __migrate_task(rq, &rf, p, cpu);
+ rq = migrate_task_to(rq, &rf, p, cpu);
}

rq_unlock(rq, &rf);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 6189d1a45635..292c593fc84f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2278,7 +2278,7 @@ static int task_numa_migrate(struct task_struct *p)

best_rq = cpu_rq(env.best_cpu);
if (env.best_task == NULL) {
- ret = migrate_task_to(p, env.best_cpu);
+ ret = numa_migrate_current_task_to(p, env.best_cpu);
WRITE_ONCE(best_rq->numa_migrate_on, 0);
if (ret != 0)
trace_sched_stick_numa(p, env.src_cpu, NULL, env.best_cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 556496c77dc2..5a86e9795731 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1718,7 +1718,7 @@ enum numa_faults_stats {
NUMA_CPUBUF
};
extern void sched_setnuma(struct task_struct *p, int node);
-extern int migrate_task_to(struct task_struct *p, int cpu);
+extern int numa_migrate_current_task_to(struct task_struct *p, int target_cpu);
extern int migrate_swap(struct task_struct *p, struct task_struct *t,
int cpu, int scpu);
extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
@@ -1731,6 +1731,8 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p)

#ifdef CONFIG_SMP

+extern struct rq *migrate_task_to(struct rq *rq, struct rq_flags *rf,
+ struct task_struct *p, int dest_cpu);
static inline void
queue_balance_callback(struct rq *rq,
struct balance_callback *head,
--
2.40.1