Re: [GIT PULL] pidfd fixes

From: Oleg Nesterov
Date: Tue Jul 23 2019 - 06:12:53 EST


On 07/22, Linus Torvalds wrote:
>
> So if we set EXIT_ZOMBIE early, then I think we should change the
> EXIT_DEAD case too. IOW, do something like this on top:
>
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -734,9 +734,10 @@ static void exit_notify(struct task_struct
> *tsk, int group_dead)
> autoreap = true;
> }
>
> - tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
> - if (tsk->exit_state == EXIT_DEAD)
> + if (autoreap) {
> + tsk->exit_state = EXIT_DEAD;
> list_add(&tsk->ptrace_entry, &dead);
> + }

Yes, this needs cleanups. Actually I was going to suggest another change
below, this way do_notify_pidfd() is only called when it is really needed.
But then I decided a trivial one-liner makes more sense for the start.

I'll try to think. Perhaps we should also change do_notify_parent() to set
p->exit_state, at least if autoreap. Then the early exit_state = EXIT_ZOMBIE
won't look so confusing and we can do more (minor) cleanups.

Oleg.

--- x/kernel/exit.c
+++ x/kernel/exit.c
@@ -182,6 +182,13 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
put_task_struct(tsk);
}

+static void do_notify_pidfd(struct task_struct *task)
+{
+ struct pid *pid;
+
+ pid = task_pid(task);
+ wake_up_all(&pid->wait_pidfd);
+}

void release_task(struct task_struct *p)
{
@@ -218,6 +225,8 @@ void release_task(struct task_struct *p)
zap_leader = do_notify_parent(leader, leader->exit_signal);
if (zap_leader)
leader->exit_state = EXIT_DEAD;
+
+ do_notify_pidfd(leader);
}

write_unlock_irq(&tasklist_lock);
@@ -710,7 +719,7 @@ static void forget_original_parent(struct task_struct *father,
*/
static void exit_notify(struct task_struct *tsk, int group_dead)
{
- bool autoreap;
+ bool autoreap, xxx;
struct task_struct *p, *n;
LIST_HEAD(dead);

@@ -720,23 +729,22 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
if (group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);

- if (unlikely(tsk->ptrace)) {
- int sig = thread_group_leader(tsk) &&
- thread_group_empty(tsk) &&
- !ptrace_reparented(tsk) ?
- tsk->exit_signal : SIGCHLD;
+ autoreap = true;
+ xxx = thread_group_leader(tsk) && thread_group_empty(tsk);
+
+ if (xxx || unlikely(tsk->ptrace)) {
+ int sig = xxx && !ptrace_reparented(tsk)
+ ? tsk->exit_signal : SIGCHLD;
autoreap = do_notify_parent(tsk, sig);
- } else if (thread_group_leader(tsk)) {
- autoreap = thread_group_empty(tsk) &&
- do_notify_parent(tsk, tsk->exit_signal);
- } else {
- autoreap = true;
}

tsk->exit_state = autoreap ? EXIT_DEAD : EXIT_ZOMBIE;
if (tsk->exit_state == EXIT_DEAD)
list_add(&tsk->ptrace_entry, &dead);

+ if (xxx)
+ do_notify_pidfd(tsk);
+
/* mt-exec, de_thread() is waiting for group leader */
if (unlikely(tsk->signal->notify_count < 0))
wake_up_process(tsk->signal->group_exit_task);
--- x/kernel/signal.c
+++ x/kernel/signal.c
@@ -1881,14 +1881,6 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
return ret;
}

-static void do_notify_pidfd(struct task_struct *task)
-{
- struct pid *pid;
-
- pid = task_pid(task);
- wake_up_all(&pid->wait_pidfd);
-}
-
/*
* Let a parent know about the death of a child.
* For a stopped/continued status change, use do_notify_parent_cldstop instead.
@@ -1912,9 +1904,6 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
BUG_ON(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));

- /* Wake up all pidfd waiters */
- do_notify_pidfd(tsk);
-
if (sig != SIGCHLD) {
/*
* This is only possible if parent == real_parent.