Re: [PATCH] prctl: add PR_{SET,GET}_CHILD_REAPER to allow simpleprocess supervision

From: Kay Sievers
Date: Tue Aug 16 2011 - 09:43:31 EST


Andrew, mind picking this up?

Thanks,
Kay

On Fri, Jul 29, 2011 at 02:01, Kay Sievers <kay.sievers@xxxxxxxx> wrote:
> From: Lennart Poettering <lennart@xxxxxxxxxxxxxx>
> Subject: prctl: add PR_{SET,GET}_CHILD_REAPER to allow simple process supervision
>
> Userspace service managers/supervisors need to track their started
> services. Many services daemonize by double-forking and get implicitely
> re-parented to PID 1. The process manager will no longer be able to
> receive the SIGCHLD signals for them.
>
> With this prctl, a service manager can mark itself as a sort of
> 'sub-init' process, able to stay as the parent process for all processes
> created by the started services. All SIGCHLD signals will be delivered
> to the service manager.
>
> As a side effect, the relevant parent PID information does not get lost
> by a double-fork, which results in a more elaborate process tree and 'ps'
> output.
>
> This is orthogonal to PID namespaces. PID namespaces are isolated
> from each other, while a service management process usually requires
> the serices to live in the same namespace, to be able to talk to each
> other.
>
> Users of this will be the systemd per-user instance, which provides
> init-like functionality for the user's login session and D-Bus, which
> activates bus services on on-demand. Both will need init-like capabilities
> to be able to properly keep track of the services they start.
>
> Signed-off-by: Lennart Poettering <lennart@xxxxxxxxxxxxxx>
> Signed-off-by: Kay Sievers <kay.sievers@xxxxxxxx>
> ---
>
> Âinclude/linux/prctl.h | Â Â3 +++
> Âinclude/linux/sched.h | Â Â2 ++
> Âkernel/exit.c     |  Â9 ++++++++-
> Âkernel/fork.c     |  Â2 ++
> Âkernel/sys.c     Â|  Â7 +++++++
> Â5 files changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/prctl.h b/include/linux/prctl.h
> index a3baeb2..716b7d3 100644
> --- a/include/linux/prctl.h
> +++ b/include/linux/prctl.h
> @@ -102,4 +102,7 @@
>
> Â#define PR_MCE_KILL_GET 34
>
> +#define PR_SET_CHILD_REAPER 35
> +#define PR_GET_CHILD_REAPER 36
> +
> Â#endif /* _LINUX_PRCTL_H */
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 20b03bf..2dba23b 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1300,6 +1300,8 @@ struct task_struct {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â * execve */
> Â Â Â Âunsigned in_iowait:1;
>
> + Â Â Â /* Reparent child processes to this process instead of pid 1. */
> + Â Â Â unsigned child_reaper:1;
>
> Â Â Â Â/* Revert to default priority/policy when forking */
> Â Â Â Âunsigned sched_reset_on_fork:1;
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 2913b35..61a80a4 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -700,7 +700,7 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
> Â Â Â Â__acquires(&tasklist_lock)
> Â{
> Â Â Â Âstruct pid_namespace *pid_ns = task_active_pid_ns(father);
> - Â Â Â struct task_struct *thread;
> + Â Â Â struct task_struct *thread, *reaper;
>
> Â Â Â Âthread = father;
> Â Â Â Âwhile_each_thread(father, thread) {
> @@ -711,6 +711,13 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
> Â Â Â Â Â Â Â Âreturn thread;
> Â Â Â Â}
>
> + Â Â Â /* find the first ancestor which is marked as child_reaper */
> + Â Â Â for (reaper = father->parent;
> + Â Â Â Â Â Âreaper != &init_task && reaper != pid_ns->child_reaper;
> + Â Â Â Â Â Âreaper = reaper->parent)
> + Â Â Â Â Â Â Â if (reaper->child_reaper)
> + Â Â Â Â Â Â Â Â Â Â Â return reaper;
> +
> Â Â Â Âif (unlikely(pid_ns->child_reaper == father)) {
> Â Â Â Â Â Â Â Âwrite_unlock_irq(&tasklist_lock);
> Â Â Â Â Â Â Â Âif (unlikely(pid_ns == &init_pid_ns))
> diff --git a/kernel/fork.c b/kernel/fork.c
> index e7ceaca..863c5c7 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1326,6 +1326,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
> Â Â Â Â Â Â Â Âp->parent_exec_id = current->self_exec_id;
> Â Â Â Â}
>
> + Â Â Â p->child_reaper = 0;
> +
> Â Â Â Âspin_lock(&current->sighand->siglock);
>
> Â Â Â Â/*
> diff --git a/kernel/sys.c b/kernel/sys.c
> index a101ba3..9b41498 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -1792,6 +1792,13 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
> Â Â Â Â Â Â Â Â Â Â Â Âelse
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âerror = PR_MCE_KILL_DEFAULT;
> Â Â Â Â Â Â Â Â Â Â Â Âbreak;
> + Â Â Â Â Â Â Â case PR_SET_CHILD_REAPER:
> + Â Â Â Â Â Â Â Â Â Â Â me->child_reaper = !!arg2;
> + Â Â Â Â Â Â Â Â Â Â Â error = 0;
> + Â Â Â Â Â Â Â Â Â Â Â break;
> + Â Â Â Â Â Â Â case PR_GET_CHILD_REAPER:
> + Â Â Â Â Â Â Â Â Â Â Â error = put_user(me->child_reaper, (int __user *) arg2);
> + Â Â Â Â Â Â Â Â Â Â Â break;
> Â Â Â Â Â Â Â Âdefault:
> Â Â Â Â Â Â Â Â Â Â Â Âerror = -EINVAL;
> Â Â Â Â Â Â Â Â Â Â Â Âbreak;
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/