Re: [PATCH 4/6] sched/deadline: Block DL tasks on non-exclusive cpuset if bandwitdh control is enable

From: Dietmar Eggemann
Date: Thu Jan 14 2021 - 10:52:35 EST


On 12/01/2021 16:53, Daniel Bristot de Oliveira wrote:
> The current SCHED_DEADLINE design supports only global scheduler,
> or variants of it, i.e., clustered and partitioned, via cpuset config.
> To enable the partitioning of a system with clusters of CPUs, the
> documentation advises the usage of exclusive cpusets, creating an
> exclusive root_domain for the cpuset.
>
> Attempts to change the cpu affinity of a thread to a cpu mask different
> from the root domain results in an error. For instance:

[...]

> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 788a391657a5..c221e14d5b86 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -2878,6 +2878,13 @@ int dl_task_can_attach(struct task_struct *p,
> if (cpumask_empty(cs_cpus_allowed))
> return 0;
>
> + /*
> + * Do not allow moving tasks to non-exclusive cpusets
> + * if bandwidth control is enabled.
> + */
> + if (dl_bandwidth_enabled() && !exclusive)
> + return -EBUSY;
> +
> /*
> * The task is not moving to another root domain, so it is
> * already accounted.
>

But doesn't this mean you only have to set this cgroup exclusive/root to
run into the same issue:

with this patch:

cgroupv1:

root@juno:/sys/fs/cgroup/cpuset# chrt -d --sched-period 1000000000
--sched-runtime 100000000 0 sleep 500 &
[1] 1668
root@juno:/sys/fs/cgroup/cpuset# PID1=$!

root@juno:/sys/fs/cgroup/cpuset# chrt -d --sched-period 1000000000
--sched-runtime 100000000 0 sleep 500 &
[2] 1669
root@juno:/sys/fs/cgroup/cpuset# PID2=$!

root@juno:/sys/fs/cgroup/cpuset# mkdir A

root@juno:/sys/fs/cgroup/cpuset# echo 0 > ./A/cpuset.mems
root@juno:/sys/fs/cgroup/cpuset# echo 0 > ./A/cpuset.cpus

root@juno:/sys/fs/cgroup/cpuset# echo $PID2 > ./A/cgroup.procs
-bash: echo: write error: Device or resource busy

root@juno:/sys/fs/cgroup/cpuset# echo 1 > ./A/cpuset.cpu_exclusive

root@juno:/sys/fs/cgroup/cpuset# echo $PID2 > ./A/cgroup.procs

root@juno:/sys/fs/cgroup/cpuset# cat /proc/$PID1/status | grep
Cpus_allowed_list | awk '{print $2}'
0-5
root@juno:/sys/fs/cgroup/cpuset# cat /proc/$PID2/status | grep
Cpus_allowed_list | awk '{print $2}'
0

cgroupv2:

root@juno:/sys/fs/cgroup# echo +cpuset > cgroup.subtree_control

root@juno:/sys/fs/cgroup# chrt -d --sched-period 1000000000
--sched-runtime 100000000 0 sleep 500 &
[1] 1687
root@juno:/sys/fs/cgroup# PID1=$!

root@juno:/sys/fs/cgroup# chrt -d --sched-period 1000000000
--sched-runtime 100000000 0 sleep 500 &
[2] 1688
root@juno:/sys/fs/cgroup# PID2=$!

root@juno:/sys/fs/cgroup# mkdir A

root@juno:/sys/fs/cgroup# echo 0 > ./A/cpuset.mems
root@juno:/sys/fs/cgroup# echo 0 > ./A/cpuset.cpus

root@juno:/sys/fs/cgroup# echo $PID2 > ./A/cgroup.procs
-bash: echo: write error: Device or resource busy

root@juno:/sys/fs/cgroup# echo root > ./A/cpuset.cpus.partition

root@juno:/sys/fs/cgroup# echo $PID2 > ./A/cgroup.procs

root@juno:/sys/fs/cgroup# cat /proc/$PID1/status | grep
Cpus_allowed_list | awk '{print $2}'
0-5
root@juno:/sys/fs/cgroup# cat /proc/$PID2/status | grep
Cpus_allowed_list | awk '{print $2}'
0