[RFC PATCH 2/6] set*uid: Check RLIMIT_PROC against new credentials

From: Michal Koutný
Date: Mon Feb 07 2022 - 08:13:02 EST


The generic idea is that not even root or capable user can force an
unprivileged user's limit breach. (For historical and security reasons
this check is postponed from set*uid to execve.) During the switch the
resource consumption of target the user has to be checked. The commits
905ae01c4ae2 ("Add a reference to ucounts for each cred") and
21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") made the
check in set_user() look at the old user's consumption.

This version of the fix simply moves the check to the place where the
actual switch of the accounting structure happens -- set_cred_ucounts().

The other callers are kept without the check but with the per-userns
accounting they may be newly subject to the check too.
The set_cred_ucounts() becomes inconsistent since task->flags are
passed by the caller but task_rlimit() is implicitly `current`'s, this
patch is meant to illustrate the issue, nicer implementation is
possible.

Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts")
Signed-off-by: Michal Koutný <mkoutny@xxxxxxxx>
---
fs/exec.c | 2 +-
include/linux/cred.h | 2 +-
kernel/cred.c | 24 +++++++++++++++++++++---
kernel/fork.c | 2 +-
kernel/sys.c | 21 +++------------------
kernel/user_namespace.c | 2 +-
6 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index fc598c2652b2..e759e42c61da 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1363,7 +1363,7 @@ int begin_new_exec(struct linux_binprm * bprm)
WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
flush_signal_handlers(me, 0);

- retval = set_cred_ucounts(bprm->cred);
+ retval = set_cred_ucounts(bprm->cred, NULL);
if (retval < 0)
goto out_unlock;

diff --git a/include/linux/cred.h b/include/linux/cred.h
index fcbc6885cc09..455525ab380d 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -170,7 +170,7 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
extern int set_create_files_as(struct cred *, struct inode *);
extern int cred_fscmp(const struct cred *, const struct cred *);
extern void __init cred_init(void);
-extern int set_cred_ucounts(struct cred *);
+extern int set_cred_ucounts(struct cred *, unsigned int *);

/*
* check for validity of credentials
diff --git a/kernel/cred.c b/kernel/cred.c
index 473d17c431f3..791cab70b764 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -370,7 +370,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
ret = create_user_ns(new);
if (ret < 0)
goto error_put;
- ret = set_cred_ucounts(new);
+ ret = set_cred_ucounts(new, NULL);
if (ret < 0)
goto error_put;
}
@@ -492,7 +492,7 @@ int commit_creds(struct cred *new)

/* do it
* RLIMIT_NPROC limits on user->processes have already been checked
- * in set_user().
+ * in set_cred_ucounts().
*/
alter_cred_subscribers(new, 2);
if (new->user != old->user || new->user_ns != old->user_ns)
@@ -663,7 +663,7 @@ int cred_fscmp(const struct cred *a, const struct cred *b)
}
EXPORT_SYMBOL(cred_fscmp);

-int set_cred_ucounts(struct cred *new)
+int set_cred_ucounts(struct cred *new, unsigned int *nproc_flags)
{
struct task_struct *task = current;
const struct cred *old = task->real_cred;
@@ -685,6 +685,24 @@ int set_cred_ucounts(struct cred *new)
new->ucounts = new_ucounts;
put_ucounts(old_ucounts);

+ if (!nproc_flags)
+ return 0;
+
+ /*
+ * We don't fail in case of NPROC limit excess here because too many
+ * poorly written programs don't check set*uid() return code, assuming
+ * it never fails if called by root. We may still enforce NPROC limit
+ * for programs doing set*uid()+execve() by harmlessly deferring the
+ * failure to the execve() stage.
+ */
+ if (ucounts_limit_cmp(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) >= 0 &&
+ new->user != INIT_USER &&
+ !security_capable(new, &init_user_ns, CAP_SYS_RESOURCE, CAP_OPT_NONE) &&
+ !security_capable(new, &init_user_ns, CAP_SYS_ADMIN, CAP_OPT_NONE))
+ *nproc_flags |= PF_NPROC_EXCEEDED;
+ else
+ *nproc_flags &= ~PF_NPROC_EXCEEDED;
+
return 0;
}

diff --git a/kernel/fork.c b/kernel/fork.c
index 7cb21a70737d..a4005c679d29 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -3051,7 +3051,7 @@ int ksys_unshare(unsigned long unshare_flags)
goto bad_unshare_cleanup_cred;

if (new_cred) {
- err = set_cred_ucounts(new_cred);
+ err = set_cred_ucounts(new_cred, NULL);
if (err)
goto bad_unshare_cleanup_cred;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 48c90dcceff3..4e4eea30e235 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -472,21 +472,6 @@ static int set_user(struct cred *new)
if (!new_user)
return -EAGAIN;

- /*
- * We don't fail in case of NPROC limit excess here because too many
- * poorly written programs don't check set*uid() return code, assuming
- * it never fails if called by root. We may still enforce NPROC limit
- * for programs doing set*uid()+execve() by harmlessly deferring the
- * failure to the execve() stage.
- */
- if (ucounts_limit_cmp(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) >= 0 &&
- new_user != INIT_USER &&
- !security_capable(new, &init_user_ns, CAP_SYS_RESOURCE, CAP_OPT_NONE) &&
- !security_capable(new, &init_user_ns, CAP_SYS_ADMIN, CAP_OPT_NONE))
- current->flags |= PF_NPROC_EXCEEDED;
- else
- current->flags &= ~PF_NPROC_EXCEEDED;
-
free_uid(new->user);
new->user = new_user;
return 0;
@@ -560,7 +545,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
if (retval < 0)
goto error;

- retval = set_cred_ucounts(new);
+ retval = set_cred_ucounts(new, &current->flags);
if (retval < 0)
goto error;

@@ -622,7 +607,7 @@ long __sys_setuid(uid_t uid)
if (retval < 0)
goto error;

- retval = set_cred_ucounts(new);
+ retval = set_cred_ucounts(new, &current->flags);
if (retval < 0)
goto error;

@@ -701,7 +686,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if (retval < 0)
goto error;

- retval = set_cred_ucounts(new);
+ retval = set_cred_ucounts(new, &current->flags);
if (retval < 0)
goto error;

diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6b2e3ca7ee99..f7eec0b0233b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -1344,7 +1344,7 @@ static int userns_install(struct nsset *nsset, struct ns_common *ns)
put_user_ns(cred->user_ns);
set_cred_user_ns(cred, get_user_ns(user_ns));

- if (set_cred_ucounts(cred) < 0)
+ if (set_cred_ucounts(cred, NULL) < 0)
return -EINVAL;

return 0;
--
2.34.1