Re: [PATCH] pidfd: change pidfd_send_signal() to respect PIDFD_THREAD

From: Christian Brauner
Date: Thu Feb 08 2024 - 08:20:18 EST


On Wed, Feb 07, 2024 at 12:45:49PM +0100, Oleg Nesterov wrote:
> Turn kill_pid_info() into kill_pid_info_type(), this allows to pass any
> pid_type to group_send_sig_info(), despite its name it should work fine
> even if type = PIDTYPE_PID.
>
> Change pidfd_send_signal() to use PIDTYPE_PID or PIDTYPE_TGID depending
> on PIDFD_THREAD.
>
> While at it kill another TODO comment in pidfd_show_fdinfo(). As Christian
> expains fdinfo reports f_flags, userspace can already detect PIDFD_THREAD.
>
> Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx>
> ---
> kernel/fork.c | 2 --
> kernel/signal.c | 18 ++++++++++++------
> 2 files changed, 12 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index cd61ca87d0e6..47b565598063 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -2051,8 +2051,6 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
>
> seq_put_decimal_ll(m, "Pid:\t", nr);
>
> - /* TODO: report PIDFD_THREAD */
> -
> #ifdef CONFIG_PID_NS
> seq_put_decimal_ll(m, "\nNSpid:\t", nr);
> if (nr > 0) {
> diff --git a/kernel/signal.c b/kernel/signal.c
> index c3fac06937e2..e3edcd784e45 100644
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -47,6 +47,7 @@
> #include <linux/cgroup.h>
> #include <linux/audit.h>
> #include <linux/sysctl.h>
> +#include <uapi/linux/pidfd.h>
>
> #define CREATE_TRACE_POINTS
> #include <trace/events/signal.h>
> @@ -1478,7 +1479,8 @@ int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
> return ret;
> }
>
> -int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
> +static int kill_pid_info_type(int sig, struct kernel_siginfo *info,
> + struct pid *pid, enum pid_type type)
> {
> int error = -ESRCH;
> struct task_struct *p;
> @@ -1487,11 +1489,10 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
> rcu_read_lock();
> p = pid_task(pid, PIDTYPE_PID);
> if (p)
> - error = group_send_sig_info(sig, info, p, PIDTYPE_TGID);
> + error = group_send_sig_info(sig, info, p, type);
> rcu_read_unlock();
> if (likely(!p || error != -ESRCH))
> return error;
> -
> /*
> * The task was unhashed in between, try again. If it
> * is dead, pid_task() will return NULL, if we race with
> @@ -1500,6 +1501,11 @@ int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
> }
> }
>
> +int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
> +{
> + return kill_pid_info_type(sig, info, pid, PIDTYPE_TGID);
> +}
> +
> static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid)
> {
> int error;
> @@ -3890,6 +3896,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
> struct fd f;
> struct pid *pid;
> kernel_siginfo_t kinfo;
> + enum pid_type type;
>
> /* Enforce flags be set to 0 until we add an extension. */
> if (flags)
> @@ -3928,9 +3935,8 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
> prepare_kill_siginfo(sig, &kinfo);
> }
>
> - /* TODO: respect PIDFD_THREAD */
> - ret = kill_pid_info(sig, &kinfo, pid);
> -
> + type = (f.file->f_flags & PIDFD_THREAD) ? PIDTYPE_PID : PIDTYPE_TGID;
> + ret = kill_pid_info_type(sig, &kinfo, pid, type);

If the user doesn't provide siginfo then the kernel fills in the info in
prepare_kill_siginfo() a few lines above. That sets info->si_code to
SI_USER even for the PIDFD_THREAD case. Whenever the info is filled in
by the kernel it's not exactly userspace impersonating anything plus we
know that what we're sending to is a pidfd by the type of the pidfd. So
it feels like we should fill in SI_TKILL here as well?

I would also suggest we update the obsolete comment on top of
pidfd_send_signal() along the lines of:

diff --git a/kernel/signal.c b/kernel/signal.c
index e3edcd784e45..40df0c17abd7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3878,14 +3878,10 @@ static struct pid *pidfd_to_pid(const struct file *file)
* @info: signal info
* @flags: future flags
*
- * The syscall currently only signals via PIDTYPE_PID which covers
- * kill(<positive-pid>, <signal>. It does not signal threads or process
- * groups.
- * In order to extend the syscall to threads and process groups the @flags
- * argument should be used. In essence, the @flags argument will determine
- * what is signaled and not the file descriptor itself. Put in other words,
- * grouping is a property of the flags argument not a property of the file
- * descriptor.
+ * If the @pidfd refers to a thread-group leader the signal is thread-group
+ * directed. If @pidfd referes to a thread then the signal is thread directed.
+ * In the future extension to @flags may be used to override the default scope
+ * of @pidfd.
*
* Return: 0 on success, negative errno on failure
*/