Re: [PATCH 27/34] sched_ext: Implement SCX_KICK_WAIT

From: Andrea Righi
Date: Thu Jul 13 2023 - 09:46:04 EST


On Mon, Jul 10, 2023 at 03:13:45PM -1000, Tejun Heo wrote:
...
> + for_each_cpu_andnot(cpu, this_rq->scx.cpus_to_wait,
> + cpumask_of(this_cpu)) {
> + /*
> + * Pairs with smp_store_release() issued by this CPU in
> + * scx_notify_pick_next_task() on the resched path.
> + *
> + * We busy-wait here to guarantee that no other task can be
> + * scheduled on our core before the target CPU has entered the
> + * resched path.
> + */
> + while (smp_load_acquire(&cpu_rq(cpu)->scx.pnt_seq) == pseqs[cpu])
> + cpu_relax();
> + }
> +

...

> +static inline void scx_notify_pick_next_task(struct rq *rq,
> + const struct task_struct *p,
> + const struct sched_class *active)
> +{
> +#ifdef CONFIG_SMP
> + if (!scx_enabled())
> + return;
> + /*
> + * Pairs with the smp_load_acquire() issued by a CPU in
> + * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
> + * resched.
> + */
> + smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
> +#endif
> +}

We can't use smp_load_acquire()/smp_store_release() with a u64 on
32-bit architectures.

For example, on armhf the build is broken:

In function ‘scx_notify_pick_next_task’,
inlined from ‘__pick_next_task’ at /<<PKGBUILDDIR>>/kernel/sched/core.c:6106:4,
inlined from ‘pick_next_task’ at /<<PKGBUILDDIR>>/kernel/sched/core.c:6605:9,
inlined from ‘__schedule’ at /<<PKGBUILDDIR>>/kernel/sched/core.c:6750:9:
/<<PKGBUILDDIR>>/include/linux/compiler_types.h:397:45: error: call to ‘__compiletime_assert_597’ declared with attribute error: Need native word sized stores/loads for atomicity.
397 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^
/<<PKGBUILDDIR>>/include/linux/compiler_types.h:378:25: note: in definition of macro ‘__compiletime_assert’
378 | prefix ## suffix(); \
| ^~~~~~
/<<PKGBUILDDIR>>/include/linux/compiler_types.h:397:9: note: in expansion of macro ‘_compiletime_assert’
397 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^~~~~~~~~~~~~~~~~~~
/<<PKGBUILDDIR>>/include/linux/compiler_types.h:400:9: note: in expansion of macro ‘compiletime_assert’
400 | compiletime_assert(__native_word(t), \
| ^~~~~~~~~~~~~~~~~~
/<<PKGBUILDDIR>>/include/asm-generic/barrier.h:141:9: note: in expansion of macro ‘compiletime_assert_atomic_type’
141 | compiletime_assert_atomic_type(*p); \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/<<PKGBUILDDIR>>/include/asm-generic/barrier.h:172:55: note: in expansion of macro ‘__smp_store_release’
172 | #define smp_store_release(p, v) do { kcsan_release(); __smp_store_release(p, v); } while (0)
| ^~~~~~~~~~~~~~~~~~~
/<<PKGBUILDDIR>>/kernel/sched/ext.h:159:9: note: in expansion of macro ‘smp_store_release’
159 | smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);

There's probably a better way to fix this, but for now I've temporarily
solved this using cmpxchg64() - see patch below.

I'm not sure if we already have an equivalent of
smp_store_release_u64/smp_load_acquire_u64(). Otherwise, it may be worth
to add them to a more generic place.

-Andrea

diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 051c79fa25f7..5da72b1cf88d 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3667,7 +3667,7 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
* scheduled on our core before the target CPU has entered the
* resched path.
*/
- while (smp_load_acquire(&cpu_rq(cpu)->scx.pnt_seq) == pseqs[cpu])
+ while (smp_load_acquire_u64(&cpu_rq(cpu)->scx.pnt_seq) == pseqs[cpu])
cpu_relax();
}

diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
index 405037a4e6ce..ef4a24d77d30 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -144,6 +144,40 @@ void __scx_notify_pick_next_task(struct rq *rq,
struct task_struct *p,
const struct sched_class *active);

+#ifdef CONFIG_64BIT
+static inline u64 smp_load_acquire_u64(u64 *ptr)
+{
+ return smp_load_acquire(ptr);
+}
+
+static inline void smp_store_release_u64(u64 *ptr, u64 val)
+{
+ smp_store_release(ptr, val);
+}
+#else
+static inline u64 smp_load_acquire_u64(u64 *ptr)
+{
+ u64 prev, next;
+
+ do {
+ prev = *ptr;
+ next = prev;
+ } while (cmpxchg64(ptr, prev, next) != prev);
+
+ return prev;
+}
+
+static inline void smp_store_release_u64(u64 *ptr, u64 val)
+{
+ u64 prev, next;
+
+ do {
+ prev = *ptr;
+ next = val;
+ } while (cmpxchg64(ptr, prev, next) != prev);
+}
+#endif
+
static inline void scx_notify_pick_next_task(struct rq *rq,
struct task_struct *p,
const struct sched_class *active)
@@ -156,7 +190,7 @@ static inline void scx_notify_pick_next_task(struct rq *rq,
* kick_cpus_irq_workfn() who is waiting for this CPU to perform a
* resched.
*/
- smp_store_release(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
+ smp_store_release_u64(&rq->scx.pnt_seq, rq->scx.pnt_seq + 1);
#endif
if (!static_branch_unlikely(&scx_ops_cpu_preempt))
return;
--
2.40.1