[PATCH 05/30] sched: *_tsk_need_resched() now takes resched_t as param

From: Ankur Arora
Date: Tue Feb 13 2024 - 00:58:12 EST


*_tsk_need_resched() now test for the immediacy of the need-resched.

These interfaces are primarily used in the scheduler and RCU. Outside
of the scheduler, set_tsk_need_resched() is typically used to force
a context switch by setting need-resched and folding it in. Update
those calls with set_tsk_need_resched(..., NR_now).

For scheduler usage, preserve the current semantics by using
set_tsk_need_resched(..., NR_now), test_tsk_need_resched(..., NR_now).

Note that clear_tsk_need_resched() is only used from __schedule()
to do any clearing needing before switching context. Now it clears
all the need-resched flags.

Cc: Peter Ziljstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Juri Lelli <juri.lelli@xxxxxxxxxx>
Cc: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
Cc: Paul E. McKenney <paulmck@xxxxxxxxxx>
Originally-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Link: https://lore.kernel.org/lkml/87jzshhexi.ffs@tglx/
Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx>
---
arch/s390/mm/pfault.c | 2 +-
include/linux/sched.h | 30 ++++++++++++++++++++++++------
kernel/rcu/tiny.c | 2 +-
kernel/rcu/tree.c | 4 ++--
kernel/rcu/tree_exp.h | 4 ++--
kernel/rcu/tree_plugin.h | 4 ++--
kernel/rcu/tree_stall.h | 2 +-
kernel/sched/core.c | 9 +++++----
kernel/sched/deadline.c | 4 ++--
kernel/sched/fair.c | 2 +-
kernel/sched/idle.c | 2 +-
kernel/sched/rt.c | 4 ++--
12 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
index 1aac13bb8f53..4e075059d2e7 100644
--- a/arch/s390/mm/pfault.c
+++ b/arch/s390/mm/pfault.c
@@ -198,7 +198,7 @@ static void pfault_interrupt(struct ext_code ext_code,
* return to userspace schedule() to block.
*/
__set_current_state(TASK_UNINTERRUPTIBLE);
- set_tsk_need_resched(tsk);
+ set_tsk_need_resched(tsk, NR_now);
set_preempt_need_resched();
}
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e790860d89c3..d226c2920cff 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1948,19 +1948,36 @@ static inline bool test_tsk_thread_flag(struct task_struct *tsk, int flag)
return test_ti_thread_flag(task_thread_info(tsk), flag);
}

-static inline void set_tsk_need_resched(struct task_struct *tsk)
+/*
+ * With !CONFIG_PREEMPT_AUTO, tif_resched(NR_lazy) reduces to
+ * tif_resched(NR_now). Add a check in the helpers below to ensure
+ * we don't touch the tif_reshed(NR_now) bit unnecessarily.
+ */
+static inline void set_tsk_need_resched(struct task_struct *tsk, resched_t rs)
{
- set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+ if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
+ set_tsk_thread_flag(tsk, tif_resched(rs));
+ else
+ /*
+ * NR_lazy is only touched under CONFIG_PREEMPT_AUTO.
+ */
+ BUG();
}

static inline void clear_tsk_need_resched(struct task_struct *tsk)
{
- clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+ clear_tsk_thread_flag(tsk, tif_resched(NR_now));
+
+ if (IS_ENABLED(CONFIG_PREEMPT_AUTO))
+ clear_tsk_thread_flag(tsk, tif_resched(NR_lazy));
}

-static inline bool test_tsk_need_resched(struct task_struct *tsk)
+static inline bool test_tsk_need_resched(struct task_struct *tsk, resched_t rs)
{
- return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
+ if (IS_ENABLED(CONFIG_PREEMPT_AUTO) || rs == NR_now)
+ return unlikely(test_tsk_thread_flag(tsk, tif_resched(rs)));
+ else
+ return false;
}

/*
@@ -2104,7 +2121,8 @@ static __always_inline bool need_resched(void)

static __always_inline bool need_resched_lazy(void)
{
- return unlikely(tif_need_resched(NR_lazy));
+ return IS_ENABLED(CONFIG_PREEMPT_AUTO) &&
+ unlikely(tif_need_resched(NR_lazy));
}

/*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index fec804b79080..a085d337434e 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -73,7 +73,7 @@ void rcu_sched_clock_irq(int user)
if (user) {
rcu_qs();
} else if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1ae851777806..d6ac2b703a6d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2250,7 +2250,7 @@ void rcu_sched_clock_irq(int user)
if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
/* Idle and userspace execution already are quiescent states. */
if (!rcu_is_cpu_rrupt_from_idle() && !user) {
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}
__this_cpu_write(rcu_data.rcu_urgent_qs, false);
@@ -2409,7 +2409,7 @@ static __latent_entropy void rcu_core(void)
if (IS_ENABLED(CONFIG_PREEMPT_COUNT) && (!(preempt_count() & PREEMPT_MASK))) {
rcu_preempt_deferred_qs(current);
} else if (rcu_preempt_need_deferred_qs(current)) {
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}

diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d7cea5d591f..34d0bbd93343 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -759,7 +759,7 @@ static void rcu_exp_handler(void *unused)
rcu_report_exp_rdp(rdp);
} else {
WRITE_ONCE(rdp->cpu_no_qs.b.exp, true);
- set_tsk_need_resched(t);
+ set_tsk_need_resched(t, NR_now);
set_preempt_need_resched();
}
return;
@@ -860,7 +860,7 @@ static void rcu_exp_need_qs(void)
__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
/* Store .exp before .rcu_urgent_qs. */
smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 41021080ad25..26c79246873a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -658,7 +658,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
// Also if no expediting and no possible deboosting,
// slow is OK. Plus nohz_full CPUs eventually get
// tick enabled.
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled &&
expboost && !rdp->defer_qs_iw_pending && cpu_online(rdp->cpu)) {
@@ -725,7 +725,7 @@ static void rcu_flavor_sched_clock_irq(int user)
(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
/* No QS, force context switch if deferred. */
if (rcu_preempt_need_deferred_qs(t)) {
- set_tsk_need_resched(t);
+ set_tsk_need_resched(t, NR_now);
set_preempt_need_resched();
}
} else if (rcu_preempt_need_deferred_qs(t)) {
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 5d666428546b..9d4aa4fde5ae 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -712,7 +712,7 @@ static void print_cpu_stall(unsigned long gps)
* progress and it could be we're stuck in kernel space without context
* switches for an entirely unreasonable amount of time.
*/
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);
set_preempt_need_resched();
}

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9116bcc90346..41c3bd49a700 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -933,7 +933,7 @@ static bool set_nr_if_polling(struct task_struct *p)
#else
static inline bool set_nr_and_not_polling(struct task_struct *p)
{
- set_tsk_need_resched(p);
+ set_tsk_need_resched(p, NR_now);
return true;
}

@@ -1045,13 +1045,13 @@ void resched_curr(struct rq *rq)

lockdep_assert_rq_held(rq);

- if (test_tsk_need_resched(curr))
+ if (test_tsk_need_resched(curr, NR_now))
return;

cpu = cpu_of(rq);

if (cpu == smp_processor_id()) {
- set_tsk_need_resched(curr);
+ set_tsk_need_resched(curr, NR_now);
set_preempt_need_resched();
return;
}
@@ -2247,7 +2247,8 @@ void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags)
* A queue event has occurred, and we're going to schedule. In
* this case, we can save a useless back to back clock update.
*/
- if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
+ if (task_on_rq_queued(rq->curr) &&
+ test_tsk_need_resched(rq->curr, NR_now))
rq_clock_skip_update(rq);
}

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a04a436af8cc..b4e68cfc3c3a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2035,7 +2035,7 @@ static void wakeup_preempt_dl(struct rq *rq, struct task_struct *p,
* let us try to decide what's the best thing to do...
*/
if ((p->dl.deadline == rq->curr->dl.deadline) &&
- !test_tsk_need_resched(rq->curr))
+ !test_tsk_need_resched(rq->curr, NR_now))
check_preempt_equal_dl(rq, p);
#endif /* CONFIG_SMP */
}
@@ -2564,7 +2564,7 @@ static void pull_dl_task(struct rq *this_rq)
static void task_woken_dl(struct rq *rq, struct task_struct *p)
{
if (!task_on_cpu(rq, p) &&
- !test_tsk_need_resched(rq->curr) &&
+ !test_tsk_need_resched(rq->curr, NR_now) &&
p->nr_cpus_allowed > 1 &&
dl_task(rq->curr) &&
(rq->curr->nr_cpus_allowed < 2 ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 533547e3c90a..ae9b237fa32b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8298,7 +8298,7 @@ static void check_preempt_wakeup_fair(struct rq *rq, struct task_struct *p, int
* prevents us from potentially nominating it as a false LAST_BUDDY
* below.
*/
- if (test_tsk_need_resched(curr))
+ if (test_tsk_need_resched(curr, NR_now))
return;

/* Idle tasks are by definition preempted by non-idle tasks. */
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index be53d164e267..cd25f71f43a7 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -359,7 +359,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
struct idle_timer *it = container_of(timer, struct idle_timer, timer);

WRITE_ONCE(it->done, 1);
- set_tsk_need_resched(current);
+ set_tsk_need_resched(current, NR_now);

return HRTIMER_NORESTART;
}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3261b067b67e..c57cc8427a57 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1680,7 +1680,7 @@ static void wakeup_preempt_rt(struct rq *rq, struct task_struct *p, int flags)
* to move current somewhere else, making room for our non-migratable
* task.
*/
- if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
+ if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr, NR_now))
check_preempt_equal_prio(rq, p);
#endif
}
@@ -2415,7 +2415,7 @@ static void pull_rt_task(struct rq *this_rq)
static void task_woken_rt(struct rq *rq, struct task_struct *p)
{
bool need_to_push = !task_on_cpu(rq, p) &&
- !test_tsk_need_resched(rq->curr) &&
+ !test_tsk_need_resched(rq->curr, NR_now) &&
p->nr_cpus_allowed > 1 &&
(dl_task(rq->curr) || rt_task(rq->curr)) &&
(rq->curr->nr_cpus_allowed < 2 ||
--
2.31.1