Re: [RFC v16][PATCH 19/43] c/r: external checkpoint of a task otherthan ourself

From: Oren Laadan
Date: Wed May 27 2009 - 18:35:52 EST


On Thu, 28 May 2009, Alexey Dobriyan wrote:

> On Wed, May 27, 2009 at 01:32:45PM -0400, Oren Laadan wrote:
> > Now we can do "external" checkpoint, i.e. act on another task.
>
> > +static int may_checkpoint_task(struct ckpt_ctx *ctx, struct task_struct *t)
> > +{
> > + if (t->state == TASK_DEAD) {
> > + pr_warning("c/r: task %d is TASK_DEAD\n", task_pid_vnr(t));
> > + return -EAGAIN;
> > + }
> > +
> > + if (!ptrace_may_access(t, PTRACE_MODE_READ)) {
> > + __ckpt_write_err(ctx, "access to task %d (%s) denied",
> > + task_pid_vnr(t), t->comm);
> > + return -EPERM;
> > + }
> > +
> > + /* verify that the task is frozen (unless self) */
> > + if (t != current && !frozen(t)) {
> > + __ckpt_write_err(ctx, "task %d (%s) is not frozen",
> > + task_pid_vnr(t), t->comm);
> > + return -EBUSY;
> > + }
> > +
> > + /* FIX: add support for ptraced tasks */
> > + if (task_ptrace(t)) {
> > + __ckpt_write_err(ctx, "task %d (%s) is ptraced",
> > + task_pid_vnr(t), t->comm);
> > + return -EBUSY;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int get_container(struct ckpt_ctx *ctx, pid_t pid)
> > +{
> > + struct task_struct *task = NULL;
> > + struct nsproxy *nsproxy = NULL;
> > + int ret;
> > +
> > + ctx->root_pid = pid;
> > +
> > + read_lock(&tasklist_lock);
> > + task = find_task_by_vpid(pid);
> > + if (task)
> > + get_task_struct(task);
> > + read_unlock(&tasklist_lock);
> > +
> > + if (!task)
> > + return -ESRCH;
> > +
> > + ret = may_checkpoint_task(ctx, task);
> > + if (ret) {
> > + ckpt_write_err(ctx, NULL);
> > + put_task_struct(task);
> > + return ret;
> > + }
> > +
> > + rcu_read_lock();
> > + nsproxy = task_nsproxy(task);
> > + get_nsproxy(nsproxy);
>
> Will oops if init is multi-threaded and thread group leader exited
> (nsproxy = NULL). I need to think what to do, too.


ood catch. Since all threads share same nsproxy (except those
who exits.. duh) we can test for this case, and get the nsproxy
from any of the other threads, something like this (untested):

diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index afc7300..b303876 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -522,9 +522,33 @@ static int get_container(struct ckpt_ctx *ctx, pid_t pid)

rcu_read_lock();
nsproxy = task_nsproxy(task);
- get_nsproxy(nsproxy);
+ if (nsproxy)
+ get_nsproxy(nsproxy);
rcu_read_unlock();

+ /*
+ * If we hit a zombie thread-group-leader, nsproxy will be NULL,
+ * and we instead grab it from one of the other threads.
+ */
+ if (!nsproxy) {
+ struct task_struct *p = next_thread(task);
+
+ BUG_ON(task->state != TASK_DEAD);
+ read_lock(&tasklist_lock);
+ while (p != task && !task_nsproxy(p))
+ p = next_thread(p);
+ nsproxy = get_nsproxy(p);
+ if (nsproxy)
+ get_nsproxy(nsproxy);
+ read_unlock(&tasklist_lock);
+ }
+
+ /* still not ... too bad ... */
+ if (!nsproxy) {
+ put_task_struct(task);
+ return -ESRCH;
+ }
+
ctx->root_task = task;
ctx->root_nsproxy = nsproxy;
ctx->root_init = is_container_init(task);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/