Re: [PATCH] watchdog: Prefer use "ref-cycles" for NMI watchdog

From: Peter Zijlstra
Date: Fri May 12 2023 - 08:47:22 EST


On Tue, May 09, 2023 at 03:17:00PM -0700, Song Liu wrote:
> NMI watchdog permanently consumes one hardware counters per CPU on the
> system. For systems that use many hardware counters, this causes more
> aggressive time multiplexing of perf events.
>
> OTOH, some CPUs (mostly Intel) support "ref-cycles" event, which is rarely
> used. Try use "ref-cycles" for the watchdog. If the CPU supports it, so
> that one more hardware counter is available to the user. If the CPU doesn't
> support "ref-cycles", fall back to "cycles".
>
> The downside of this change is that users of "ref-cycles" need to disable
> nmi_watchdog.

Urgh..

how about something like so instead; then you can use whatever event you
like...

---
include/linux/nmi.h | 2 ++
kernel/watchdog.c | 45 ++++++++++++++++++++++++++++++++++++---------
kernel/watchdog_hld.c | 4 ++--
3 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 048c0b9aa623..8b6307837346 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -19,6 +19,8 @@ bool is_hardlockup(void);

extern int watchdog_user_enabled;
extern int nmi_watchdog_user_enabled;
+extern int nmi_watchdog_type;
+extern u64 nmi_watchdog_config;
extern int soft_watchdog_user_enabled;
extern int watchdog_thresh;
extern unsigned long watchdog_enabled;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 8e61f21e7e33..b3c09e0f96a3 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -40,6 +40,8 @@ static DEFINE_MUTEX(watchdog_mutex);
unsigned long __read_mostly watchdog_enabled;
int __read_mostly watchdog_user_enabled = 1;
int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT;
+int __ro_after_init nmi_watchdog_type = PERF_TYPE_HARDWARE;
+u64 __ro_after_init nmi_watchdog_config = PERF_COUNT_HW_CPU_CYCLES;
int __read_mostly soft_watchdog_user_enabled = 1;
int __read_mostly watchdog_thresh = 10;
static int __read_mostly nmi_watchdog_available;
@@ -73,15 +75,40 @@ void __init hardlockup_detector_disable(void)

static int __init hardlockup_panic_setup(char *str)
{
- if (!strncmp(str, "panic", 5))
- hardlockup_panic = 1;
- else if (!strncmp(str, "nopanic", 7))
- hardlockup_panic = 0;
- else if (!strncmp(str, "0", 1))
- nmi_watchdog_user_enabled = 0;
- else if (!strncmp(str, "1", 1))
- nmi_watchdog_user_enabled = 1;
- return 1;
+ int ret = 1;
+
+ if (!str)
+ return -EINVAL;
+
+ while (str) {
+ char *next = strchr(str, ',');
+ if (next) {
+ *next = 0;
+ next++;
+ }
+
+ if (!strcmp(str, "panic"))
+ hardlockup_panic = 1;
+ else if (!strcmp(str, "nopanic"))
+ hardlockup_panic = 0;
+ else if (!strcmp(str, "0"))
+ nmi_watchdog_user_enabled = 0;
+ else if (!strcmp(str, "1"))
+ nmi_watchdog_user_enabled = 1;
+ else if (str[0] == 'r') {
+ str++;
+ ret = kstrtou64(str, 16, &nmi_watchdog_config);
+ if (ret)
+ break;
+ nmi_watchdog_type = PERF_TYPE_RAW;
+ nmi_watchdog_user_enabled = 1;
+ }
+
+ str = next;
+ }
+
+ return ret;
+
}
__setup("nmi_watchdog=", hardlockup_panic_setup);

diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 247bf0b1582c..27bc15f9a92a 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -99,8 +99,6 @@ static inline bool watchdog_check_timestamp(void)
#endif

static struct perf_event_attr wd_hw_attr = {
- .type = PERF_TYPE_HARDWARE,
- .config = PERF_COUNT_HW_CPU_CYCLES,
.size = sizeof(struct perf_event_attr),
.pinned = 1,
.disabled = 1,
@@ -170,6 +168,8 @@ static int hardlockup_detector_event_create(void)
struct perf_event *evt;

wd_attr = &wd_hw_attr;
+ wd_attr->type = nmi_watchdog_type;
+ wd_attr->config = nmi_watchdog_config;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);

/* Try to register using hardware perf events */