[PATCH 2.6.10-mm2] Fix preemption race [1/3] (Core)

From: tglx
Date: Sun Jan 09 2005 - 19:41:46 EST


The idle-thread-preemption-fix.patch introduced a race, which is not
critical, but might give us an extra turn through the scheduler. When
interrupts are reenabled in entry.c and an interrupt occures before we
reach the add_preempt_schedule() in preempt_schedule we get rescheduled
again in the return from interrupt path.

The patch prevents this by leaving preemption disabled (set to 1) and
providing a seperate function entry_preempt_schedule().
This was done by moving the inner loop of preempt_schedule() into a
seperate function do_preempt_schedule(), which has an increment argument.
The function is called from preempt_schedule() with PREEMPT_ACTIVE and
from entry_preempt_schedule() with PREEMPT_ACTIVE-1 to fixup the already
available 1 in preempt_count.

This split adds different plausibility checks for entry code and kernel
calls and provides simpler adjusting of other platforms esp. ARM to the
new code.

Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

---
sched.c | 44 ++++++++++++++++++++++++++++++++++----------
entry.S | 3 ++-
2 files changed, 36 insertions(+), 11 deletions(-)
---
Index: 2.6.10-mm1/kernel/sched.c
===================================================================
--- 2.6.10-mm1/kernel/sched.c (revision 141)
+++ 2.6.10-mm1/kernel/sched.c (working copy)
@@ -2836,22 +2836,15 @@
* off of preempt_enable. Kernel preemptions off return from interrupt
* occur there and call schedule directly.
*/
-asmlinkage void __sched preempt_schedule(void)
+static void __sched do_preempt_schedule(int incr)
{
- struct thread_info *ti = current_thread_info();
#ifdef CONFIG_PREEMPT_BKL
struct task_struct *task = current;
int saved_lock_depth;
#endif
- /*
- * If there is a non-zero preempt_count or interrupts are disabled,
- * we do not want to preempt the current task. Just return..
- */
- if (unlikely(ti->preempt_count || irqs_disabled()))
- return;

need_resched:
- add_preempt_count(PREEMPT_ACTIVE);
+ add_preempt_count(incr);
/*
* We keep the big kernel semaphore locked, but we
* clear ->lock_depth so that schedule() doesnt
@@ -2865,15 +2858,46 @@
#ifdef CONFIG_PREEMPT_BKL
task->lock_depth = saved_lock_depth;
#endif
- sub_preempt_count(PREEMPT_ACTIVE);
+ sub_preempt_count(incr);

/* we could miss a preemption opportunity between schedule and now */
barrier();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
}
+/*
+ * this is is the entry point to schedule() from in-kernel preemption
+ * off of preempt_enable. Kernel preemptions off return from interrupt
+ * occur there and call schedule directly.
+ */
+asmlinkage void __sched preempt_schedule(void)
+{
+ struct thread_info *ti = current_thread_info();
+ /*
+ * If there is a non-zero preempt_count or interrupts are disabled,
+ * we do not want to preempt the current task. Just return..
+ */
+ if (unlikely(ti->preempt_count || irqs_disabled()))
+ return;
+ do_preempt_schedule(PREEMPT_ACTIVE);
+}

EXPORT_SYMBOL(preempt_schedule);
+
+asmlinkage void __sched entry_preempt_schedule(void)
+{
+ struct thread_info *ti = current_thread_info();
+ /*
+ * If preempt_count != 1 or interrupts are disabled
+ * the calling code is broken.
+ */
+ BUG_ON((ti->preempt_count != 1 || irqs_disabled()));
+
+ do_preempt_schedule(PREEMPT_ACTIVE - 1);
+}
+
+EXPORT_SYMBOL(entry_preempt_schedule);
+
#endif /* CONFIG_PREEMPT */

int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, void *key)
Index: 2.6.10-mm1/arch/i386/kernel/entry.S
===================================================================
--- 2.6.10-mm1/arch/i386/kernel/entry.S (revision 141)
+++ 2.6.10-mm1/arch/i386/kernel/entry.S (working copy)
@@ -197,8 +197,9 @@
jz restore_all
testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all
+ movl $1,TI_preempt_count(%ebp)
sti
- call preempt_schedule
+ call entry_preempt_schedule
cli
movl $0,TI_preempt_count(%ebp)
jmp need_resched
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/