[PATCH v5 1/2] sched/rt: Check to push the task away after its affinity was changed

From: Xunlei Pang
Date: Wed Jul 08 2015 - 13:00:44 EST


From: Xunlei Pang <pang.xunlei@xxxxxxxxxx>

We may suffer from extra rt overload rq due to the affinity,
so when the affinity of any runnable rt task is changed, we
should check to trigger balancing, otherwise it will cause
some unnecessary delayed real-time response. Unfortunately,
current RT global scheduler does nothing about this.

For example: a 2-cpu system with two runnable FIFO tasks(same
rt_priority) bound on CPU0, let's name them rt1(running) and
rt2(runnable) respectively; CPU1 has no RTs. Then, someone sets
the affinity of rt2 to 0x3(i.e. CPU0 and CPU1), but after this,
rt2 still can't be scheduled enters schedule(), this
definitely causes some/big response latency for rt2.

The patch tries to push the task away once it got migratable.

The patch also solves a problem about move_queued_task() called
in set_cpus_allowed_ptr():
When a lower priority rt task got migrated due to its curr cpu
isn't in the new affinity mask, after move_queued_task() it will
miss the chance of pushing away, because check_preempt_curr()
called by move_queued_task() doens't set the "need resched flag"
for lower priority tasks.

Signed-off-by: Xunlei Pang <pang.xunlei@xxxxxxxxxx>
---
v4->v5:
Avoid triggering the lockdep_pin stuff that Peter pointed out:
lockdep_unpin_lock(&rq->lock);
__balance_callback(rq);
lockdep_pin_lock(&rq->lock);


kernel/sched/core.c | 87 ++++++++++++++++++++++++++++++-----------------------
kernel/sched/rt.c | 18 ++++++++---
2 files changed, 63 insertions(+), 42 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b803e1b..bb8f3ad 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1045,6 +1045,46 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
}

#ifdef CONFIG_SMP
+
+/* rq->lock is held */
+static void __balance_callback(struct rq *rq)
+{
+ struct callback_head *head, *next;
+ void (*func)(struct rq *rq);
+
+ head = rq->balance_callback;
+ rq->balance_callback = NULL;
+ while (head) {
+ func = (void (*)(struct rq *))head->func;
+ next = head->next;
+ head->next = NULL;
+ head = next;
+
+ func(rq);
+ }
+}
+
+/* preemption is disabled */
+static inline void balance_callback(struct rq *rq)
+{
+ if (unlikely(rq->balance_callback)) {
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&rq->lock, flags);
+ __balance_callback(rq);
+ raw_spin_unlock_irqrestore(&rq->lock, flags);
+ }
+}
+
+#else
+
+static inline void balance_callback(struct rq *rq)
+{
+}
+
+#endif
+
+#ifdef CONFIG_SMP
/*
* This is how migration works:
*
@@ -1187,6 +1227,16 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
}

do_set_cpus_allowed(p, new_mask);
+ /*
+ * rq->lock might get released during __balance_callback(),
+ * but if there's any successful migrating of @p, task_cpu(p)
+ * will obviously be in the new_mask, as p->pi_lock is never
+ * released; Thus, subsequent cpumask_test_cpu() is true and
+ * will make it return safely in such case.
+ */
+ lockdep_unpin_lock(&rq->lock);
+ __balance_callback(rq);
+ lockdep_pin_lock(&rq->lock);

/* Can the task run on the task's current CPU? If so, we're done */
if (cpumask_test_cpu(task_cpu(p), new_mask))
@@ -2480,43 +2530,6 @@ static struct rq *finish_task_switch(struct task_struct *prev)
return rq;
}

-#ifdef CONFIG_SMP
-
-/* rq->lock is NOT held, but preemption is disabled */
-static void __balance_callback(struct rq *rq)
-{
- struct callback_head *head, *next;
- void (*func)(struct rq *rq);
- unsigned long flags;
-
- raw_spin_lock_irqsave(&rq->lock, flags);
- head = rq->balance_callback;
- rq->balance_callback = NULL;
- while (head) {
- func = (void (*)(struct rq *))head->func;
- next = head->next;
- head->next = NULL;
- head = next;
-
- func(rq);
- }
- raw_spin_unlock_irqrestore(&rq->lock, flags);
-}
-
-static inline void balance_callback(struct rq *rq)
-{
- if (unlikely(rq->balance_callback))
- __balance_callback(rq);
-}
-
-#else
-
-static inline void balance_callback(struct rq *rq)
-{
-}
-
-#endif
-
/**
* schedule_tail - first thing a freshly forked thread must call.
* @prev: the thread we just switched away from.
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 00816ee..d27061f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2089,14 +2089,15 @@ static void set_cpus_allowed_rt(struct task_struct *p,

weight = cpumask_weight(new_mask);

+ rq = task_rq(p);
+
/*
- * Only update if the process changes its state from whether it
- * can migrate or not.
+ * Skip updating the migration stuff if the process doesn't change
+ * its migrate state, but still need to check if it can be pushed
+ * away due to its new affinity.
*/
if ((p->nr_cpus_allowed > 1) == (weight > 1))
- return;
-
- rq = task_rq(p);
+ goto queue_push;

/*
* The process used to be able to migrate OR it can now migrate
@@ -2113,6 +2114,13 @@ static void set_cpus_allowed_rt(struct task_struct *p,
}

update_rt_migration(&rq->rt);
+
+queue_push:
+ if (weight > 1 &&
+ !task_running(rq, p) &&
+ !test_tsk_need_resched(rq->curr) &&
+ !cpumask_subset(new_mask, &p->cpus_allowed))
+ queue_push_tasks(rq);
}

/* Assumes rq->lock is held */
--
1.9.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/