[PATCH 3/3] x86/resctrl: Display cache occupancy of busy RMIDs

From: Haifeng Xu
Date: Tue Jan 23 2024 - 04:22:30 EST


If llc_occupany is enabled, the RMID may not be freed immediately unless
its llc_occupany is less than the resctrl_rmid_realloc_threshold.

In our production environment, those unused RMIDs get stuck in the limbo
list forever because their llc_occupancy are larger than the threshold.
After turning it up , we can successfully free unused RMIDs and create
new monitor groups. In order to accquire the llc_occupancy of RMIDs in
each rdt domain, we use perf tool to track and filter the log manually.

It's not efficient enough. Therefore, we can add a RFTYPE_TOP_INFO file
'busy_rmids_info' that tells users the llc_occupancy of busy RMIDs. It
can also help to guide users how much the resctrl_rmid_realloc_threshold
should be.

Signed-off-by: Haifeng Xu <haifeng.xu@xxxxxxxxxx>
---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 70 ++++++++++++++++++++++++++
1 file changed, 70 insertions(+)

diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 1eac0ca97b81..88dadb87f4e1 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -32,6 +32,12 @@
#include <asm/resctrl.h>
#include "internal.h"

+struct busy_rmids_info {
+ struct rdt_resource *r;
+ struct rdt_domain *d;
+ struct seq_file *seq;
+};
+
DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
@@ -934,6 +940,63 @@ static int rdt_free_rmids_show(struct kernfs_open_file *of,
return 0;
}

+void rdt_domain_busy_rmids_show(void *info)
+{
+
+ struct rdt_resource *r;
+ struct rdt_domain *d;
+ struct seq_file *seq;
+ struct busy_rmids_info *rmids_info = info;
+ u32 crmid = 1, nrmid;
+ u64 val;
+ int ret;
+
+ r = rmids_info->r;
+ d = rmids_info->d;
+ seq = rmids_info->seq;
+
+ seq_printf(seq, "domain-%d busy rmids.\n", d->id);
+
+ for (;;) {
+ nrmid = find_next_bit(d->rmid_busy_llc, r->num_rmid, crmid);
+ if (nrmid >= r->num_rmid)
+ break;
+
+ ret = resctrl_arch_rmid_read(r, d, nrmid, QOS_L3_OCCUP_EVENT_ID, &val);
+ switch (ret) {
+ case -EIO:
+ seq_printf(seq, "I/O Error\n");
+ return;
+ case -EINVAL:
+ seq_printf(seq, "Invalid Argument\n");
+ return;
+ default:
+ seq_printf(seq, "rmid:%d llc_occupancy:%llu\n", nrmid, val);
+ }
+ crmid = nrmid + 1;
+ }
+}
+
+static int rdt_busy_rmids_info_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+
+ struct rdt_domain *d;
+ struct rdt_resource *r;
+ struct busy_rmids_info info;
+
+ mutex_lock(&rdtgroup_mutex);
+ r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ info.r = r;
+ info.seq = seq;
+ list_for_each_entry(d, &r->domains, list) {
+ info.d = d;
+ smp_call_function_any(&d->cpu_mask, rdt_domain_busy_rmids_show, &info, 1);
+ }
+ mutex_unlock(&rdtgroup_mutex);
+ return 0;
+}
+
static int rdt_num_closids_show(struct kernfs_open_file *of,
struct seq_file *seq, void *v)
{
@@ -1791,6 +1854,13 @@ static struct rftype res_common_files[] = {
.seq_show = rdt_free_rmids_show,
.fflags = RFTYPE_TOP_INFO,
},
+ {
+ .name = "busy_rmids_info",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_busy_rmids_info_show,
+ .fflags = RFTYPE_TOP_INFO,
+ },
{
.name = "num_closids",
.mode = 0444,
--
2.25.1