[PATCH] pidfd: cleanup the usage of __pidfd_prepare's flags

From: Oleg Nesterov
Date: Thu Jan 25 2024 - 11:19:10 EST


- make pidfd_create() static.

- Don't pass O_RDWR | O_CLOEXEC to __pidfd_prepare() in copy_process(),
__pidfd_prepare() adds these flags unconditionally.

- Kill the flags check in __pidfd_prepare(). sys_pidfd_open() checks the
flags itself, all other users of pidfd_prepare() pass flags = 0.

If we need a sanity check for those other in kernel users then
WARN_ON_ONCE(flags & ~PIDFD_NONBLOCK) makes more sense.

- Don't pass O_RDWR to get_unused_fd_flags(), it ignores everything except
O_CLOEXEC.

- Don't pass O_CLOEXEC to anon_inode_getfile(), it ignores everything except
O_ACCMODE | O_NONBLOCK.

Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx>
---
include/linux/pid.h | 1 -
kernel/fork.c | 9 +++------
kernel/pid.c | 2 +-
3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 395cacce1179..e6a041cb8bac 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -73,7 +73,6 @@ struct file;
extern struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
-int pidfd_create(struct pid *pid, unsigned int flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);

static inline struct pid *get_pid(struct pid *pid)
diff --git a/kernel/fork.c b/kernel/fork.c
index c981fa6171c1..347641398f9d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2130,15 +2130,12 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
int pidfd;
struct file *pidfd_file;

- if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
- return -EINVAL;
-
- pidfd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
+ pidfd = get_unused_fd_flags(O_CLOEXEC);
if (pidfd < 0)
return pidfd;

pidfd_file = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
- flags | O_RDWR | O_CLOEXEC);
+ flags | O_RDWR);
if (IS_ERR(pidfd_file)) {
put_unused_fd(pidfd);
return PTR_ERR(pidfd_file);
@@ -2524,7 +2521,7 @@ __latent_entropy struct task_struct *copy_process(
*/
if (clone_flags & CLONE_PIDFD) {
/* Note that no task has been attached to @pid yet. */
- retval = __pidfd_prepare(pid, O_RDWR | O_CLOEXEC, &pidfile);
+ retval = __pidfd_prepare(pid, 0, &pidfile);
if (retval < 0)
goto bad_fork_free_pid;
pidfd = retval;
diff --git a/kernel/pid.c b/kernel/pid.c
index b52b10865454..c7a3e359f8f5 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -595,7 +595,7 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
* Return: On success, a cloexec pidfd is returned.
* On error, a negative errno number will be returned.
*/
-int pidfd_create(struct pid *pid, unsigned int flags)
+static int pidfd_create(struct pid *pid, unsigned int flags)
{
int pidfd;
struct file *pidfd_file;
--
2.25.1.362.g51ebf55