Re: [RFC][PATCH 1/3] sched: Provide delayed wakeup list

From: Manfred Spraul
Date: Sun Oct 02 2011 - 09:55:26 EST


Hi Peter,

Do you still work on the wake_up_list() patch?

On 09/14/2011 03:30 PM, Peter Zijlstra wrote:
/*
* wake flags
*/
@@ -1255,6 +1268,8 @@ struct task_struct {
unsigned int btrace_seq;
#endif

+ struct wake_list_node wake_list;
+
unsigned int policy;
cpumask_t cpus_allowed;
A global wake_list

@@ -2143,6 +2158,35 @@ extern void wake_up_new_task(struct task
extern void sched_fork(struct task_struct *p);
extern void sched_dead(struct task_struct *p);

+static inline void
+wake_list_add(struct wake_list_head *head, struct task_struct *p)
+{
+ struct wake_list_node *n =&p->wake_list;
+
+ get_task_struct(p);
+ /*
+ * Atomically grab the task, if ->wake_list is !0 already it means
+ * its already queued (either by us or someone else) and will get the
+ * wakeup due to that.
+ *
+ * This cmpxchg() implies a full barrier, which pairs with the write
+ * barrier implied by the wakeup in wake_up_list().
+ */
+ if (cmpxchg(&n->next, 0, n) != 0) {
+ /* It was already queued, drop the extra ref and we're done. */
+ put_task_struct(p);
+ return;
+ }
+
A task can be only once on the wake_list.
+ /*
+ * The head is context local, there can be no concurrency.
+ */
+ n->next = head->first;
+ head->first = n;
+}
+
+extern void wake_up_list(struct wake_list_head *head, unsigned int state);
+
extern void proc_caches_init(void);
extern void flush_signals(struct task_struct *);
extern void __flush_signals(struct task_struct *);
Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -2916,6 +2916,25 @@ int wake_up_state(struct task_struct *p,
return try_to_wake_up(p, state, 0);
}

+void wake_up_list(struct wake_list_head *head, unsigned int state)
+{
+ struct wake_list_node *n = head->first;
+ struct task_struct *p;
+
+ while (n != WAKE_LIST_TAIL) {
+ p = container_of(n, struct task_struct, wake_list);
+ n = n->next;
+
+ p->wake_list.next = NULL;
+ /*
+ * wake_up_state() implies a wmb() to pair with the queueing
+ * in wake_list_add() so as not to miss wakeups.
+ */
+ wake_up_state(p, state);
+ put_task_struct(p);
+ }
+}
And wake_up_list() uses state.
That can't work:
What if one waker wants to wake TASK_INTERRUPTIBLE and the other waker wants to wake TASK_UNINTERRUPTIBLE|TASK_INTERRUPTIBLE?

--
Manfred
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/