[RFC PATCH 35/86] thread_info: change to tif_need_resched(resched_t)

From: Ankur Arora
Date: Tue Nov 07 2023 - 17:03:34 EST


tif_need_resched() now takes a parameter specifying the resched
type: RESCHED_lazy for when we allow the running task to run to
completion before eventually scheduling at a userspace boundary
and, RESCHED_eager for the next safe preemption point.

need_resched(), which is used by non-core code now checks for
presence of either of the need-resched bits. Also given that
need_resched() (and tif_need_resched() to a lesser extent), is
used extensively in the kernel so it is worth noting the common
uses and how they will change:

- idle: we always want to schedule out of idle whenever there is
any work. So the appropriate check is for both the conditions.
(Currently we use need_resched() most places and the interfaces
defined in sched/idle.h use tif_need_resched().)

However, as discussed in later commits it is critical that
when scheduling out of idle, we always reschedule with
RESCHED_eager (which maps to TIF_NEED_RESCHED.) This suggests
that idle code everywhere should instead just do:

while (!tif_need_resched(RESCHED_eager) { ... }

or similar. That is true, but we have a lot of idle code and it
does not seem to make sense to expose scheduler implementation
details all over.

- uses in conjunction with preempt_count(): we only ever want to
fold or make preemption decisions based on TIF_NEED_RESCHED, not
TIF_NEED_RESCHED_LAZY. So, related logic needs to use
tif_need_resched(RESCHED_eager).

- code that relinquishes resources temporarily (locks, irq, etc)
checks for should_resched() and would preempt if TIF_NEED_RESCHED
were set due to the (preempt_count() == offset) check.
The hand-rolled versions, typically check for need_resched()
which is a wider check.

In either case the final arbiter is preempt_schedule() which
checks via preemptible() does the more narrow check.

Would it make sense to schedule out for both the need-resched
flags?

Originally-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx>
---
arch/s390/include/asm/preempt.h | 4 ++--
drivers/acpi/processor_idle.c | 2 +-
include/asm-generic/preempt.h | 4 ++--
include/linux/preempt.h | 2 +-
include/linux/sched.h | 4 +++-
include/linux/sched/idle.h | 8 ++++----
include/linux/thread_info.h | 8 ++++----
kernel/sched/idle.c | 2 +-
kernel/trace/trace.c | 2 +-
9 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index bf15da0fedbc..4dddefae1387 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -114,13 +114,13 @@ static inline void __preempt_count_sub(int val)

static inline bool __preempt_count_dec_and_test(void)
{
- return !--S390_lowcore.preempt_count && tif_need_resched();
+ return !--S390_lowcore.preempt_count && tif_need_resched(RESCHED_eager);
}

static inline bool should_resched(int preempt_offset)
{
return unlikely(preempt_count() == preempt_offset &&
- tif_need_resched());
+ tif_need_resched(RESCHED_eager));
}

#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 3a34a8c425fe..1a69f082833e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -108,7 +108,7 @@ static const struct dmi_system_id processor_power_dmi_table[] = {
*/
static void __cpuidle acpi_safe_halt(void)
{
- if (!tif_need_resched()) {
+ if (!need_resched()) {
raw_safe_halt();
raw_local_irq_disable();
}
diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h
index b4d43a4af5f7..4f4abcc5981d 100644
--- a/include/asm-generic/preempt.h
+++ b/include/asm-generic/preempt.h
@@ -66,7 +66,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
* operations; we cannot use PREEMPT_NEED_RESCHED because it might get
* lost.
*/
- return !--*preempt_count_ptr() && tif_need_resched();
+ return !--*preempt_count_ptr() && tif_need_resched(RESCHED_eager);
}

/*
@@ -75,7 +75,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
static __always_inline bool should_resched(int preempt_offset)
{
return unlikely(preempt_count() == preempt_offset &&
- tif_need_resched());
+ tif_need_resched(RESCHED_eager));
}

#ifdef CONFIG_PREEMPTION
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 1424670df161..0abc6a673c41 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -301,7 +301,7 @@ do { \
} while (0)
#define preempt_fold_need_resched() \
do { \
- if (tif_need_resched()) \
+ if (tif_need_resched(RESCHED_eager)) \
set_preempt_need_resched(); \
} while (0)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 95d47783ff6e..5f0d7341cb88 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2172,9 +2172,11 @@ static inline int rwlock_needbreak(rwlock_t *lock)

static __always_inline bool need_resched(void)
{
- return unlikely(tif_need_resched());
+ return unlikely(tif_need_resched(RESCHED_eager) ||
+ tif_need_resched(RESCHED_lazy));
}

+
/*
* Wrappers for p->thread_info->cpu access. No-op on UP.
*/
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 478084f9105e..719416fe8ddc 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -63,7 +63,7 @@ static __always_inline bool __must_check current_set_polling_and_test(void)
*/
smp_mb__after_atomic();

- return unlikely(tif_need_resched());
+ return unlikely(need_resched());
}

static __always_inline bool __must_check current_clr_polling_and_test(void)
@@ -76,7 +76,7 @@ static __always_inline bool __must_check current_clr_polling_and_test(void)
*/
smp_mb__after_atomic();

- return unlikely(tif_need_resched());
+ return unlikely(need_resched());
}

#else
@@ -85,11 +85,11 @@ static inline void __current_clr_polling(void) { }

static inline bool __must_check current_set_polling_and_test(void)
{
- return unlikely(tif_need_resched());
+ return unlikely(need_resched());
}
static inline bool __must_check current_clr_polling_and_test(void)
{
- return unlikely(tif_need_resched());
+ return unlikely(need_resched());
}
#endif

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 4eb22b13bf64..be5333a2c832 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -200,17 +200,17 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti

#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H

-static __always_inline bool tif_need_resched(void)
+static __always_inline bool tif_need_resched(resched_t r)
{
- return arch_test_bit(TIF_NEED_RESCHED,
+ return arch_test_bit(tif_resched(r),
(unsigned long *)(&current_thread_info()->flags));
}

#else

-static __always_inline bool tif_need_resched(void)
+static __always_inline bool tif_need_resched(resched_t r)
{
- return test_bit(TIF_NEED_RESCHED,
+ return test_bit(tif_resched(r),
(unsigned long *)(&current_thread_info()->flags));
}

diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 5007b25c5bc6..d4a55448e459 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -57,7 +57,7 @@ static noinline int __cpuidle cpu_idle_poll(void)
ct_cpuidle_enter();

raw_local_irq_enable();
- while (!tif_need_resched() &&
+ while (!need_resched() &&
(cpu_idle_force_poll || tick_check_broadcast_expired()))
cpu_relax();
raw_local_irq_disable();
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7f565f0a00da..7f067ad9cf50 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2720,7 +2720,7 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
trace_flags |= TRACE_FLAG_BH_OFF;

- if (tif_need_resched())
+ if (tif_need_resched(RESCHED_eager))
trace_flags |= TRACE_FLAG_NEED_RESCHED;
if (test_preempt_need_resched())
trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
--
2.31.1