[PATCH] hrtimer: add cmdline parameter retry_threshold to config retry times in interrupt handler routine

From: tiozhang
Date: Wed Jan 31 2024 - 05:42:30 EST


Motivation of doing this is to give user a config option to reduce time
cost in hrtimer irq when influenced by some time consuming hrtimer
callbacks running in irq context.
E.g, if we tune this parameter to 1, we dont retry anymore to prevent
occasionally consecutive time consuming callbacks running in a single
interrupt.

Signed-off-by: tiozhang <tiozhang@xxxxxxxxxxxxxx>
---
Documentation/admin-guide/kernel-parameters.txt | 5 +++++
kernel/time/hrtimer.c | 17 +++++++++++++++--
2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..29fcb1c43863 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1737,6 +1737,11 @@
hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET
registers. Default set by CONFIG_HPET_MMAP_DEFAULT.

+ hrtimer.retry_threshold=
+ [KNL] Number of retry times when expired timer found
+ in hrtimer interrupt handle routine. Default 3.
+ Format: <int> (must be <= 3)
+
hugepages= [HW] Number of HugeTLB pages to allocate at boot.
If this follows hugepagesz (below), it specifies
the number of pages of hugepagesz to be allocated.
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 238262e4aba7..d3c17aa6cc13 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -144,6 +144,8 @@ static struct hrtimer_cpu_base migration_cpu_base = {

#define migration_base migration_cpu_base.clock_base[0]

+static int retry_threshold = 3;
+
static inline bool is_migration_base(struct hrtimer_clock_base *base)
{
return base == &migration_base;
@@ -1836,7 +1838,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
* - being scheduled away when running in a VM
*
* We need to prevent that we loop forever in the hrtimer
- * interrupt routine. We give it 3 attempts to avoid
+ * interrupt routine. We give it at most 3 attempts to avoid
* overreacting on some spurious event.
*
* Acquire base lock for updating the offsets and retrieving
@@ -1845,7 +1847,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
raw_spin_lock_irqsave(&cpu_base->lock, flags);
now = hrtimer_update_base(cpu_base);
cpu_base->nr_retries++;
- if (++retries < 3)
+ if (++retries < retry_threshold)
goto retry;
/*
* Give the system a chance to do something else than looping
@@ -2398,3 +2400,14 @@ int __sched schedule_hrtimeout(ktime_t *expires,
return schedule_hrtimeout_range(expires, 0, mode);
}
EXPORT_SYMBOL_GPL(schedule_hrtimeout);
+
+static int __init hrtimer_retry_threshold_setup(char *str)
+{
+ if (kstrtoint(str, 0, &retry_threshold) || retry_threshold > 3) {
+ retry_threshold = 3;
+ pr_warn("hrtimer.retry_threshold: bad given value, using default as 3\n");
+ }
+
+ return 1;
+}
+__setup("hrtimer.retry_threshold=", hrtimer_retry_threshold_setup);
--
2.17.1