[PATCH 4/4] selftests/resctrl: Adjust SNC support messages

From: Maciej Wieczor-Retman
Date: Wed Mar 06 2024 - 05:41:06 EST


Resctrl selftest prints a message on test failure that Sub-Numa
Clustering (SNC) could be enabled and points the user to check theirs BIOS
settings. No actual check is performed before printing that message so
it is not very accurate in pinpointing a problem.

Figuring out if SNC is enabled is only one part of the problem, the
other being whether the kernel supports it. As there is no easy
interface that simply states SNC support in the kernel one can find that
information by comparing L3 cache sizes from different sources. Cache
size reported by /sys/devices/system/node/node0/cpu0/cache/index3/size
will always show the full cache size even if it's split by enabled SNC.
On the other hand /sys/fs/resctrl/size has information about L3 size,
that with kernel support is adjusted for enabled SNC.

Add a function to find a cache size from /sys/fs/resctrl/size since
finding that information from the other source is already implemented.

Add a function that compares the two cache sizes and use it to make the
SNC support message more meaningful.

Add the SNC support message just after MBA's check_results() since MBA
shares code with MBM and also can suffer from enabled SNC if there is no
support in the kernel.

Signed-off-by: Maciej Wieczor-Retman <maciej.wieczor-retman@xxxxxxxxx>
---
tools/testing/selftests/resctrl/cat_test.c | 2 +-
tools/testing/selftests/resctrl/cmt_test.c | 6 +-
tools/testing/selftests/resctrl/mba_test.c | 2 +
tools/testing/selftests/resctrl/mbm_test.c | 4 +-
tools/testing/selftests/resctrl/resctrl.h | 5 +-
tools/testing/selftests/resctrl/resctrlfs.c | 69 ++++++++++++++++++++-
6 files changed, 79 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 4cb991be8e31..1cdaadf35f03 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -253,7 +253,7 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param
return ret;

/* Get L3/L2 cache size */
- ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size);
+ ret = get_sys_cache_size(uparams->cpu, test->resource, &cache_total_size);
if (ret)
return ret;
ksft_print_msg("Cache size :%lu\n", cache_total_size);
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index a81f91222a89..b7cada602484 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -112,7 +112,7 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param
if (ret)
return ret;

- ret = get_cache_size(uparams->cpu, "L3", &cache_total_size);
+ ret = get_sys_cache_size(uparams->cpu, "L3", &cache_total_size);
if (ret)
return ret;
ksft_print_msg("Cache size :%lu\n", cache_total_size);
@@ -157,8 +157,8 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param
goto out;

ret = check_results(&param, span, n);
- if (ret && (get_vendor() == ARCH_INTEL))
- ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+ if (ret && (get_vendor() == ARCH_INTEL) && snc_ways() > 1 && !snc_kernel_support())
+ ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled. Check BIOS configuration.\n");

out:
cmt_test_cleanup();
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index fc31a61dab0c..89fe3ecbf497 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -160,6 +160,8 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param
goto out;

ret = check_results();
+ if (ret && (get_vendor() == ARCH_INTEL) && snc_ways() > 1 && !snc_kernel_support())
+ ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled. Check BIOS configuration.\n");

out:
mba_test_cleanup();
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index d67ffa3ec63a..e12b4b06f6d5 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -129,8 +129,8 @@ static int mbm_run_test(const struct resctrl_test *test, const struct user_param
goto out;

ret = check_results(DEFAULT_SPAN);
- if (ret && (get_vendor() == ARCH_INTEL))
- ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+ if (ret && (get_vendor() == ARCH_INTEL) && snc_ways() > 1 && !snc_kernel_support())
+ ksft_print_msg("Kernel doesn't support Sub-NUMA Clustering but it is enabled. Check BIOS configuration.\n");

out:
mbm_test_cleanup();
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 178fb2eab13a..038e1269a3fc 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -28,6 +28,7 @@
#define RESCTRL_PATH "/sys/fs/resctrl"
#define PHYS_ID_PATH "/sys/devices/system/cpu/cpu"
#define INFO_PATH "/sys/fs/resctrl/info"
+#define SIZE_PATH "/sys/fs/resctrl/size"

/*
* CPU vendor IDs
@@ -168,14 +169,16 @@ unsigned long create_bit_mask(unsigned int start, unsigned int len);
unsigned int count_contiguous_bits(unsigned long val, unsigned int *start);
int get_full_cbm(const char *cache_type, unsigned long *mask);
int get_mask_no_shareable(const char *cache_type, unsigned long *mask);
-int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size);
int resource_info_unsigned_get(const char *resource, const char *filename, unsigned int *val);
+int get_sys_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size);
+int get_resctrl_cache_size(const char *cache_type, unsigned long *cache_size);
void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
int signal_handler_register(void);
void signal_handler_unregister(void);
void cat_test_cleanup(void);
unsigned int count_bits(unsigned long n);
void cmt_test_cleanup(void);
+int snc_kernel_support(void);

void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config);
void perf_event_initialize_read_format(struct perf_event_read *pe_read);
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index e4d3624a8817..dbd10cb7abf5 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -214,14 +214,14 @@ int snc_ways(void)
}

/*
- * get_cache_size - Get cache size for a specified CPU
+ * get_sys_cache_size - Get cache size for a specified CPU
* @cpu_no: CPU number
* @cache_type: Cache level L2/L3
* @cache_size: pointer to cache_size
*
* Return: = 0 on success, < 0 on failure.
*/
-int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size)
+int get_sys_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size)
{
char cache_path[1024], cache_str[64];
int length, i, cache_num;
@@ -273,6 +273,44 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size
return 0;
}

+/*
+ * get_resctrl_cache_size - Get cache size as reported by resctrl
+ * @cache_type: Cache level L2/L3
+ * @cache_size: pointer to cache_size
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+int get_resctrl_cache_size(const char *cache_type, unsigned long *cache_size)
+{
+ char line[256], cache_prefix[16], *stripped_line, *token;
+ size_t len;
+ FILE *fp;
+
+ strcpy(cache_prefix, cache_type);
+ strncat(cache_prefix, ":", 1);
+
+ fp = fopen(SIZE_PATH, "r");
+ if (!fp) {
+ ksft_print_msg("Failed to open %s : '%s'\n",
+ SIZE_PATH, strerror(errno));
+ return -1;
+ }
+
+ while (fgets(line, sizeof(line), fp)) {
+ stripped_line = strstr(line, cache_prefix);
+
+ if (stripped_line) {
+ len = strlen(cache_prefix);
+ stripped_line += len;
+ token = strtok(stripped_line, ";");
+ if (sscanf(token, "0=%lu", cache_size) <= 0)
+ return -1;
+ }
+ }
+ fclose(fp);
+ return 0;
+}
+
#define CORE_SIBLINGS_PATH "/sys/bus/cpu/devices/cpu"

/*
@@ -935,3 +973,30 @@ unsigned int count_bits(unsigned long n)

return count;
}
+
+/**
+ * snc_kernel_support - Compare system reported cache size and resctrl
+ * reported cache size to get an idea if SNC is supported on the kernel side.
+ * If SNC is enabled and the kernel does support it the value should be equal.
+ * If the kernel doesn't support SNC the.
+ *
+ * Return: 0 if not supported, 1 if supported, < 0 on failure.
+ */
+int snc_kernel_support(void)
+{
+ unsigned long resctrl_cache_size, node_cache_size;
+ int ret;
+
+ ret = get_sys_cache_size(0, "L3", &node_cache_size);
+ if (ret < 0)
+ return ret;
+
+ ret = get_resctrl_cache_size("L3", &resctrl_cache_size);
+ if (ret < 0)
+ return ret;
+
+ if (resctrl_cache_size == node_cache_size)
+ return 1;
+
+ return 0;
+}
--
2.44.0