[PATCH v9 11/20] sched: Split the guts of sched_setaffinity() into a helper function

From: Will Deacon
Date: Tue Jun 08 2021 - 14:04:31 EST


In preparation for replaying user affinity requests using a saved mask,
split sched_setaffinity() up so that the initial task lookup and
security checks are only performed when the request is coming directly
from userspace.

Reviewed-by: Valentin Schneider <Valentin.Schneider@xxxxxxx>
Signed-off-by: Will Deacon <will@xxxxxxxxxx>
---
kernel/sched/core.c | 105 ++++++++++++++++++++++++--------------------
1 file changed, 57 insertions(+), 48 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f6ea3d7a07f2..d151446d5987 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6787,53 +6787,22 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
return retval;
}

-long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+static int
+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
{
- cpumask_var_t cpus_allowed, new_mask;
- struct task_struct *p;
int retval;
+ cpumask_var_t cpus_allowed, new_mask;

- rcu_read_lock();
-
- p = find_process_by_pid(pid);
- if (!p) {
- rcu_read_unlock();
- return -ESRCH;
- }
-
- /* Prevent p going away */
- get_task_struct(p);
- rcu_read_unlock();
+ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL))
+ return -ENOMEM;

- if (p->flags & PF_NO_SETAFFINITY) {
- retval = -EINVAL;
- goto out_put_task;
- }
- if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
- retval = -ENOMEM;
- goto out_put_task;
- }
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
retval = -ENOMEM;
goto out_free_cpus_allowed;
}
- retval = -EPERM;
- if (!check_same_owner(p)) {
- rcu_read_lock();
- if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
- rcu_read_unlock();
- goto out_free_new_mask;
- }
- rcu_read_unlock();
- }
-
- retval = security_task_setscheduler(p);
- if (retval)
- goto out_free_new_mask;
-

cpuset_cpus_allowed(p, cpus_allowed);
- cpumask_and(new_mask, in_mask, cpus_allowed);
+ cpumask_and(new_mask, mask, cpus_allowed);

/*
* Since bandwidth control happens on root_domain basis,
@@ -6854,23 +6823,63 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
#endif
again:
retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
+ if (retval)
+ goto out_free_new_mask;

- if (!retval) {
- cpuset_cpus_allowed(p, cpus_allowed);
- if (!cpumask_subset(new_mask, cpus_allowed)) {
- /*
- * We must have raced with a concurrent cpuset
- * update. Just reset the cpus_allowed to the
- * cpuset's cpus_allowed
- */
- cpumask_copy(new_mask, cpus_allowed);
- goto again;
- }
+ cpuset_cpus_allowed(p, cpus_allowed);
+ if (!cpumask_subset(new_mask, cpus_allowed)) {
+ /*
+ * We must have raced with a concurrent cpuset update.
+ * Just reset the cpumask to the cpuset's cpus_allowed.
+ */
+ cpumask_copy(new_mask, cpus_allowed);
+ goto again;
}
+
out_free_new_mask:
free_cpumask_var(new_mask);
out_free_cpus_allowed:
free_cpumask_var(cpus_allowed);
+ return retval;
+}
+
+long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+{
+ struct task_struct *p;
+ int retval;
+
+ rcu_read_lock();
+
+ p = find_process_by_pid(pid);
+ if (!p) {
+ rcu_read_unlock();
+ return -ESRCH;
+ }
+
+ /* Prevent p going away */
+ get_task_struct(p);
+ rcu_read_unlock();
+
+ if (p->flags & PF_NO_SETAFFINITY) {
+ retval = -EINVAL;
+ goto out_put_task;
+ }
+
+ if (!check_same_owner(p)) {
+ rcu_read_lock();
+ if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) {
+ rcu_read_unlock();
+ retval = -EPERM;
+ goto out_put_task;
+ }
+ rcu_read_unlock();
+ }
+
+ retval = security_task_setscheduler(p);
+ if (retval)
+ goto out_put_task;
+
+ retval = __sched_setaffinity(p, in_mask);
out_put_task:
put_task_struct(p);
return retval;
--
2.32.0.rc1.229.g3e70b5a671-goog