Re: [PATCH] sched/deadline: No need to check NULL later_mask

From: Xunlei Pang
Date: Sat Apr 02 2016 - 06:14:50 EST


On 2016/04/02 at 00:21, Peter Zijlstra wrote:
> On Fri, Apr 01, 2016 at 08:16:37PM +0800, Xunlei Pang wrote:
>> On 2016/04/01 at 19:29, Peter Zijlstra wrote:
>>> On Fri, Apr 01, 2016 at 07:13:18PM +0800, Xunlei Pang wrote:
>>>> Unlike rt tasks, we (should) have no deadline tasks in
>>>> booting phase before the mask is allocated, so we can
>>>> safely remove the check.
>>> Why? And have the kernel explode once it grows an early deadline task?
>>>
>>> Is there _any_ benefit to this?
>> This is a performance-critical path, it'd be better to avoid such a check.
> And the changelog didn't say.. and if its an optimization you should at
> least attempt numbers or instruction counts or whatnot.
>
>> I think in the early boot stage before sched_init_smp(), it's weird to
>> use a deadline task, relying on rt tasks should be enough for us.
> You never know.
>
> Something like the below should completely avoid the problem though.
>
> It uses __initdata storage when coming up and switched to allocated data
> before we bring up smp.
>
> A similar thing could be done to RT..
>
> In fact, both could share the mask, its a temporary mask anyhow, and I'm
> pretty sure there's more such cpumasks lying about that we can merge.
>
> Completely untested...
>
> ---
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 11594230ef4d..acdc291577a0 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7243,8 +7244,6 @@ void __init sched_init_smp(void)
> sched_init_granularity();
> free_cpumask_var(non_isolated_cpus);
>
> - init_sched_rt_class();
> - init_sched_dl_class();
> }
> #else
> void __init sched_init_smp(void)
> @@ -7444,6 +7443,9 @@ void __init sched_init(void)
> zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
> idle_thread_set_boot_cpu();
> set_cpu_rq_start_time();
> +
> + init_sched_rt_class();
> + init_sched_dl_class();
> #endif
> init_sched_fair_class();
>
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index affd97ec9f65..24d7dbede99e 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1273,7 +1273,8 @@ static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu
> return NULL;
> }
>
> -static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
> +static __initdata struct cpumask __local_mask;
> +static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl) = &__local_mask;
>
> static int find_later_rq(struct task_struct *task)
> {
> @@ -1282,10 +1283,6 @@ static int find_later_rq(struct task_struct *task)
> int this_cpu = smp_processor_id();
> int best_cpu, cpu = task_cpu(task);
>
> - /* Make sure the mask is initialized first */
> - if (unlikely(!later_mask))
> - return -1;
> -
> if (task->nr_cpus_allowed == 1)
> return -1;
>

Your proposal is very nice!

At the sched_init() stage we only have one (to be "idle") task and with irq disabled,
no scheduling will happen, and the cpu_possible_mask was already initiated, so it's
safe to simply move them there.

Also, how about rt&deadline sharing a percpu mask? Because only one of them can
use the mask at a moment, operations are always under some spin_lock_irqsave().

I made a new patch below, slightly tested by running tens of rt&dl tasks for a while,
are you fine with it?

---

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0b21e7a..94ae69a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -93,6 +93,11 @@
DEFINE_MUTEX(sched_domains_mutex);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

+#ifdef CONFIG_SMP
+/* Used by Push/Pull scheduling shared by rt and deadline */
+DEFINE_PER_CPU_ALIGNED(cpumask_var_t, sched_pp_mask);
+#endif
+
static void update_rq_clock_task(struct rq *rq, s64 delta);

void update_rq_clock(struct rq *rq)
@@ -7172,9 +7177,6 @@ void __init sched_init_smp(void)
BUG();
sched_init_granularity();
free_cpumask_var(non_isolated_cpus);
-
- init_sched_rt_class();
- init_sched_dl_class();
}
#else
void __init sched_init_smp(void)
@@ -7374,6 +7376,11 @@ void __init sched_init(void)
zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
idle_thread_set_boot_cpu();
set_cpu_rq_start_time();
+
+ for_each_possible_cpu(i) {
+ zalloc_cpumask_var_node(&per_cpu(sched_pp_mask, i),
+ GFP_KERNEL, cpu_to_node(i));
+ }
#endif
init_sched_fair_class();

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index aeca716..a3557f8 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1288,19 +1288,13 @@ next_node:
return NULL;
}

-static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
-
static int find_later_rq(struct task_struct *task)
{
struct sched_domain *sd;
- struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
+ struct cpumask *later_mask;
int this_cpu = smp_processor_id();
int best_cpu, cpu = task_cpu(task);

- /* Make sure the mask is initialized first */
- if (unlikely(!later_mask))
- return -1;
-
if (task->nr_cpus_allowed == 1)
return -1;

@@ -1308,6 +1302,7 @@ static int find_later_rq(struct task_struct *task)
* We have to consider system topology and task affinity
* first, then we can look for a suitable cpu.
*/
+ later_mask = this_cpu_cpumask_var_ptr(sched_pp_mask);
best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
task, later_mask);
if (best_cpu == -1)
@@ -1694,15 +1689,6 @@ static void rq_offline_dl(struct rq *rq)
cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
}

-void __init init_sched_dl_class(void)
-{
- unsigned int i;
-
- for_each_possible_cpu(i)
- zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
- GFP_KERNEL, cpu_to_node(i));
-}
-
#endif /* CONFIG_SMP */

static void switched_from_dl(struct rq *rq, struct task_struct *p)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 5624713..3491809 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1612,22 +1612,17 @@ static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
return NULL;
}

-static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
-
static int find_lowest_rq(struct task_struct *task)
{
struct sched_domain *sd;
- struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
+ struct cpumask *lowest_mask;
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);

- /* Make sure the mask is initialized first */
- if (unlikely(!lowest_mask))
- return -1;
-
if (task->nr_cpus_allowed == 1)
return -1; /* No other targets possible */

+ lowest_mask = this_cpu_cpumask_var_ptr(sched_pp_mask);
if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
return -1; /* No targets found */

@@ -2164,16 +2159,6 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)

queue_pull_task(rq);
}
-
-void __init init_sched_rt_class(void)
-{
- unsigned int i;
-
- for_each_possible_cpu(i) {
- zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
- GFP_KERNEL, cpu_to_node(i));
- }
-}
#endif /* CONFIG_SMP */

/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e6d4a3f..b892954 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -715,6 +715,10 @@ static inline int cpu_of(struct rq *rq)

DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

+#ifdef CONFIG_SMP
+DECLARE_PER_CPU_ALIGNED(cpumask_var_t, sched_pp_mask);
+#endif
+
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() this_cpu_ptr(&runqueues)
#define task_rq(p) cpu_rq(task_cpu(p))
@@ -1296,8 +1300,6 @@ extern void sysrq_sched_debug_show(void);
extern void sched_init_granularity(void);
extern void update_max_interval(void);

-extern void init_sched_dl_class(void);
-extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void);

extern void resched_curr(struct rq *rq);
--
1.8.3.1