[RFC,PATCH 5/5] exec: RT sub-thread can livelock and monopolize CPU on exec

From: Oleg Nesterov
Date: Sat Aug 18 2007 - 13:38:53 EST


de_thread() yields waiting for ->group_leader to be a zombie. This deadlocks
if an rt-prio execer shares the same cpu with ->group_leader. Change the code
to use ->group_exit_task/notify_count mechanics.

This patch certainly uglifies the code, perhaps someone can suggest something
better.

Signed-off-by: Oleg Nesterov <oleg@xxxxxxxxxx>

--- t/kernel/exit.c~5_DEADLOCK 2007-08-17 18:56:31.000000000 +0400
+++ t/kernel/exit.c 2007-08-18 20:57:49.000000000 +0400
@@ -102,10 +102,9 @@ static void __exit_signal(struct task_st
* If there is any task waiting for the group exit
* then notify it:
*/
- if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
+ if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count)
wake_up_process(sig->group_exit_task);
- sig->group_exit_task = NULL;
- }
+
if (tsk == sig->curr_target)
sig->curr_target = next_thread(tsk);
/*
@@ -850,6 +849,11 @@ static void exit_notify(struct task_stru
state = EXIT_DEAD;
tsk->exit_state = state;

+ if (thread_group_leader(tsk) &&
+ tsk->signal->notify_count < 0 &&
+ tsk->signal->group_exit_task)
+ wake_up_process(tsk->signal->group_exit_task);
+
write_unlock_irq(&tasklist_lock);

list_for_each_safe(_p, _n, &ptrace_dead) {
--- t/fs/exec.c~5_DEADLOCK 2007-08-18 19:34:12.000000000 +0400
+++ t/fs/exec.c 2007-08-18 20:43:59.000000000 +0400
@@ -828,16 +828,15 @@ static int de_thread(struct task_struct
hrtimer_restart(&sig->real_timer);
spin_lock_irq(lock);
}
+
+ sig->notify_count = count;
+ sig->group_exit_task = tsk;
while (atomic_read(&sig->count) > count) {
- sig->group_exit_task = tsk;
- sig->notify_count = count;
__set_current_state(TASK_UNINTERRUPTIBLE);
spin_unlock_irq(lock);
schedule();
spin_lock_irq(lock);
}
- sig->group_exit_task = NULL;
- sig->notify_count = 0;
spin_unlock_irq(lock);

/*
@@ -846,14 +845,17 @@ static int de_thread(struct task_struct
* and to assume its PID:
*/
if (!thread_group_leader(tsk)) {
- /*
- * Wait for the thread group leader to be a zombie.
- * It should already be zombie at this point, most
- * of the time.
- */
leader = tsk->group_leader;
- while (leader->exit_state != EXIT_ZOMBIE)
- yield();
+
+ sig->notify_count = -1;
+ for (;;) {
+ write_lock_irq(&tasklist_lock);
+ if (likely(leader->exit_state))
+ break;
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ write_unlock_irq(&tasklist_lock);
+ schedule();
+ }

/*
* The only record we have of the real-time age of a
@@ -867,8 +869,6 @@ static int de_thread(struct task_struct
*/
tsk->start_time = leader->start_time;

- write_lock_irq(&tasklist_lock);
-
BUG_ON(leader->tgid != tsk->tgid);
BUG_ON(tsk->pid == tsk->tgid);
/*
@@ -901,6 +901,8 @@ static int de_thread(struct task_struct
write_unlock_irq(&tasklist_lock);
}

+ sig->group_exit_task = NULL;
+ sig->notify_count = 0;
/*
* There may be one thread left which is just exiting,
* but it's safe to stop telling the group to kill themselves.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/