[ANNOUNCE] v6.3.1-rt13

From: Sebastian Andrzej Siewior
Date: Tue May 09 2023 - 12:46:50 EST


Dear RT folks!

I'm pleased to announce the v6.3.1-rt13 patch set.

Changes since v6.3.1-rt12:

- Two posix-timers picked-up from the list. They are scheduled for
upstream inclusion. One of prevents a livelock on PREEMPT_RT in
itimer_delete(). Patches by Thomas Gleixner.

- A softirq handling patch from the list 'revert: "softirq: Let
ksoftirqd do its job' from Paolo Abeni. This revert should reduce a
lot of trouble which start once ksoftirqd is woken up.
The 6.1-RT series has the ktimersd thread which mitigates some of
the pain. This patch should render the patch obsolete.
Should everything work out as expected I intend to backport this
patch the earlier RT series and revert the ktimersd patch in the
v6.1 series.

Known issues
None

The delta patch against v6.3.1-rt12 is appended below and can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/6.3/incr/patch-6.3.1-rt12-rt13.patch.xz

You can get this release via the git tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v6.3.1-rt13

The RT patch against v6.3.1 can be found here:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/6.3/older/patch-6.3.1-rt13.patch.xz

The split quilt queue is available at:

https://cdn.kernel.org/pub/linux/kernel/projects/rt/6.3/older/patches-6.3.1-rt13.tar.xz

Sebastian

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index a39be9f9ba966..b38ce53576000 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -136,7 +136,7 @@ struct signal_struct {
#ifdef CONFIG_POSIX_TIMERS

/* POSIX.1b Interval Timers */
- int posix_timer_id;
+ unsigned int next_posix_timer_id;
struct list_head posix_timers;

/* ITIMER_REAL timer for the process */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 82f3e68fbe220..af9e879bbbf75 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -80,21 +80,6 @@ static void wakeup_softirqd(void)
wake_up_process(tsk);
}

-/*
- * If ksoftirqd is scheduled, we do not want to process pending softirqs
- * right now. Let ksoftirqd handle this at its own rate, to get fairness,
- * unless we're doing some of the synchronous softirqs.
- */
-#define SOFTIRQ_NOW_MASK ((1 << HI_SOFTIRQ) | (1 << TASKLET_SOFTIRQ))
-static bool ksoftirqd_running(unsigned long pending)
-{
- struct task_struct *tsk = __this_cpu_read(ksoftirqd);
-
- if (pending & SOFTIRQ_NOW_MASK)
- return false;
- return tsk && task_is_running(tsk) && !__kthread_should_park(tsk);
-}
-
#ifdef CONFIG_TRACE_IRQFLAGS
DEFINE_PER_CPU(int, hardirqs_enabled);
DEFINE_PER_CPU(int, hardirq_context);
@@ -236,7 +221,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
goto out;

pending = local_softirq_pending();
- if (!pending || ksoftirqd_running(pending))
+ if (!pending)
goto out;

/*
@@ -432,9 +417,6 @@ static inline bool should_wake_ksoftirqd(void)

static inline void invoke_softirq(void)
{
- if (ksoftirqd_running(local_softirq_pending()))
- return;
-
if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
/*
@@ -468,7 +450,7 @@ asmlinkage __visible void do_softirq(void)

pending = local_softirq_pending();

- if (pending && !ksoftirqd_running(pending))
+ if (pending)
do_softirq_own_stack();

local_irq_restore(flags);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 0c8a87a11b39d..863b8ac073c30 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -140,25 +140,29 @@ static struct k_itimer *posix_timer_by_id(timer_t id)
static int posix_timer_add(struct k_itimer *timer)
{
struct signal_struct *sig = current->signal;
- int first_free_id = sig->posix_timer_id;
struct hlist_head *head;
- int ret = -ENOENT;
+ unsigned int start, id;

- do {
+ /* Can be written by a different task concurrently in the loop below */
+ start = READ_ONCE(sig->next_posix_timer_id);
+
+ for (id = ~start; start != id; id++) {
spin_lock(&hash_lock);
- head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
- if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
+ id = sig->next_posix_timer_id;
+
+ /* Write the next ID back. Clamp it to the positive space */
+ WRITE_ONCE(sig->next_posix_timer_id, (id + 1) & INT_MAX);
+
+ head = &posix_timers_hashtable[hash(sig, id)];
+ if (!__posix_timers_find(head, sig, id)) {
hlist_add_head_rcu(&timer->t_hash, head);
- ret = sig->posix_timer_id;
+ spin_unlock(&hash_lock);
+ return id;
}
- if (++sig->posix_timer_id < 0)
- sig->posix_timer_id = 0;
- if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
- /* Loop over all possible ids completed */
- ret = -EAGAIN;
spin_unlock(&hash_lock);
- } while (ret == -ENOENT);
- return ret;
+ }
+ /* POSIX return code when no timer ID could be allocated */
+ return -EAGAIN;
}

static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
@@ -1033,27 +1037,59 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
}

/*
- * return timer owned by the process, used by exit_itimers
+ * Delete a timer if it is armed, remove it from the hash and schedule it
+ * for RCU freeing.
*/
static void itimer_delete(struct k_itimer *timer)
{
-retry_delete:
- spin_lock_irq(&timer->it_lock);
+ unsigned long flags;

+retry_delete:
+ /*
+ * irqsave is required to make timer_wait_running() work.
+ */
+ spin_lock_irqsave(&timer->it_lock, flags);
+
+ /*
+ * Even if the timer is not longer accessible from other tasks
+ * it still might be armed and queued in the underlying timer
+ * mechanism. Worse, that timer mechanism might run the expiry
+ * function concurrently.
+ */
if (timer_delete_hook(timer) == TIMER_RETRY) {
- spin_unlock_irq(&timer->it_lock);
+ /*
+ * Timer is expired concurrently, prevent livelocks
+ * and pointless spinning on RT.
+ *
+ * The CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y case is
+ * irrelevant here because obviously the exiting task
+ * cannot be expiring timer in task work concurrently.
+ * Ditto for CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n as the
+ * tick interrupt cannot run on this CPU because the above
+ * spin_lock disabled interrupts.
+ *
+ * timer_wait_running() drops timer::it_lock, which opens
+ * the possibility for another task to delete the timer.
+ *
+ * That's not possible here because this is invoked from
+ * do_exit() only for the last thread of the thread group.
+ * So no other task can access that timer.
+ */
+ if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer))
+ return;
+
goto retry_delete;
}
list_del(&timer->list);

- spin_unlock_irq(&timer->it_lock);
+ spin_unlock_irqrestore(&timer->it_lock, flags);
release_posix_timer(timer, IT_ID_SET);
}

/*
- * This is called by do_exit or de_thread, only when nobody else can
- * modify the signal->posix_timers list. Yet we need sighand->siglock
- * to prevent the race with /proc/pid/timers.
+ * Invoked from do_exit() when the last thread of a thread group exits.
+ * At that point no other task can access the timers of the dying
+ * task anymore.
*/
void exit_itimers(struct task_struct *tsk)
{
@@ -1063,10 +1099,12 @@ void exit_itimers(struct task_struct *tsk)
if (list_empty(&tsk->signal->posix_timers))
return;

+ /* Protect against concurrent read via /proc/$PID/timers */
spin_lock_irq(&tsk->sighand->siglock);
list_replace_init(&tsk->signal->posix_timers, &timers);
spin_unlock_irq(&tsk->sighand->siglock);

+ /* The timers are not longer accessible via tsk::signal */
while (!list_empty(&timers)) {
tmr = list_first_entry(&timers, struct k_itimer, list);
itimer_delete(tmr);
diff --git a/localversion-rt b/localversion-rt
index 6e44e540b927b..9f7d0bdbffb18 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt12
+-rt13