[PATCH v1 1/6] x86/resctrl: Move __resctrl_sched_in() out-of-line

From: Peter Newman
Date: Mon Mar 25 2024 - 14:07:23 EST


__resctrl_sched_in() is unable to dereference a struct rdtgroup pointer
when defined inline because rdtgroup is a private structure defined in
internal.h.

This function is defined inline to avoid impacting context switch
performance for the majority of users who aren't using resctrl at all.
These benefits can already be realized without access to internal
resctrl data structures.

The logic of performing an out-of-line call to __resctrl_sched_in() only
when resctrl is mounted is architecture-independent, so the inline
definition of resctrl_sched_in() can be moved into linux/resctrl.h.

Signed-off-by: Peter Newman <peternewman@xxxxxxxxxx>
---
arch/x86/include/asm/resctrl.h | 75 --------------------------
arch/x86/kernel/cpu/resctrl/internal.h | 24 +++++++++
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 41 ++++++++++++++
arch/x86/kernel/process_32.c | 2 +-
arch/x86/kernel/process_64.c | 2 +-
include/linux/resctrl.h | 21 ++++++++
6 files changed, 88 insertions(+), 77 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 12dbd2588ca7..99ba8c0dc155 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -14,30 +14,6 @@
*/
#define X86_RESCTRL_EMPTY_CLOSID ((u32)~0)

-/**
- * struct resctrl_pqr_state - State cache for the PQR MSR
- * @cur_rmid: The cached Resource Monitoring ID
- * @cur_closid: The cached Class Of Service ID
- * @default_rmid: The user assigned Resource Monitoring ID
- * @default_closid: The user assigned cached Class Of Service ID
- *
- * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
- * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
- * contains both parts, so we need to cache them. This also
- * stores the user configured per cpu CLOSID and RMID.
- *
- * The cache also helps to avoid pointless updates if the value does
- * not change.
- */
-struct resctrl_pqr_state {
- u32 cur_rmid;
- u32 cur_closid;
- u32 default_rmid;
- u32 default_closid;
-};
-
-DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
-
extern bool rdt_alloc_capable;
extern bool rdt_mon_capable;

@@ -79,50 +55,6 @@ static inline void resctrl_arch_disable_mon(void)
static_branch_dec_cpuslocked(&rdt_enable_key);
}

-/*
- * __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
- *
- * Following considerations are made so that this has minimal impact
- * on scheduler hot path:
- * - This will stay as no-op unless we are running on an Intel SKU
- * which supports resource control or monitoring and we enable by
- * mounting the resctrl file system.
- * - Caches the per cpu CLOSid/RMID values and does the MSR write only
- * when a task with a different CLOSid/RMID is scheduled in.
- * - We allocate RMIDs/CLOSids globally in order to keep this as
- * simple as possible.
- * Must be called with preemption disabled.
- */
-static inline void __resctrl_sched_in(struct task_struct *tsk)
-{
- struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
- u32 closid = state->default_closid;
- u32 rmid = state->default_rmid;
- u32 tmp;
-
- /*
- * If this task has a closid/rmid assigned, use it.
- * Else use the closid/rmid assigned to this cpu.
- */
- if (static_branch_likely(&rdt_alloc_enable_key)) {
- tmp = READ_ONCE(tsk->closid);
- if (tmp)
- closid = tmp;
- }
-
- if (static_branch_likely(&rdt_mon_enable_key)) {
- tmp = READ_ONCE(tsk->rmid);
- if (tmp)
- rmid = tmp;
- }
-
- if (closid != state->cur_closid || rmid != state->cur_rmid) {
- state->cur_closid = closid;
- state->cur_rmid = rmid;
- wrmsr(MSR_IA32_PQR_ASSOC, rmid, closid);
- }
-}
-
static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
{
unsigned int scale = boot_cpu_data.x86_cache_occ_scale;
@@ -150,12 +82,6 @@ static inline bool resctrl_arch_match_rmid(struct task_struct *tsk, u32 ignored,
return READ_ONCE(tsk->rmid) == rmid;
}

-static inline void resctrl_sched_in(struct task_struct *tsk)
-{
- if (static_branch_likely(&rdt_enable_key))
- __resctrl_sched_in(tsk);
-}
-
static inline u32 resctrl_arch_system_num_rmid_idx(void)
{
/* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
@@ -188,7 +114,6 @@ void resctrl_cpu_detect(struct cpuinfo_x86 *c);

#else

-static inline void resctrl_sched_in(struct task_struct *tsk) {}
static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}

#endif /* CONFIG_X86_CPU_RESCTRL */
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index c99f26ebe7a6..56a68e542572 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -331,6 +331,30 @@ struct rftype {
char *buf, size_t nbytes, loff_t off);
};

+/**
+ * struct resctrl_pqr_state - State cache for the PQR MSR
+ * @cur_rmid: The cached Resource Monitoring ID
+ * @cur_closid: The cached Class Of Service ID
+ * @default_rmid: The user assigned Resource Monitoring ID
+ * @default_closid: The user assigned cached Class Of Service ID
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them. This also
+ * stores the user configured per cpu CLOSID and RMID.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct resctrl_pqr_state {
+ u32 cur_rmid;
+ u32 cur_closid;
+ u32 default_rmid;
+ u32 default_closid;
+};
+
+DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
+
/**
* struct mbm_state - status for each MBM counter in each domain
* @prev_bw_bytes: Previous bytes value read for bandwidth calculation
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 011e17efb1a6..5d599d99f94b 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -334,6 +334,47 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
return ret;
}

+/*
+ * __resctrl_sched_in() - Writes the task's control and monitor IDs into the CPU
+ *
+ * Following considerations are made so that this has minimal impact
+ * on scheduler hot path:
+ * - Caches the per cpu CLOSid/RMID values and does the MSR write only
+ * when a task with a different CLOSid/RMID is scheduled in.
+ * - We allocate RMIDs/CLOSids globally in order to keep this as
+ * simple as possible.
+ * Must be called with preemption disabled.
+ */
+void __resctrl_sched_in(struct task_struct *tsk)
+{
+ struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
+ u32 closid = state->default_closid;
+ u32 rmid = state->default_rmid;
+ u32 tmp;
+
+ /*
+ * If this task has a closid/rmid assigned, use it.
+ * Else use the closid/rmid assigned to this cpu.
+ */
+ if (static_branch_likely(&rdt_alloc_enable_key)) {
+ tmp = READ_ONCE(tsk->closid);
+ if (tmp)
+ closid = tmp;
+ }
+
+ if (static_branch_likely(&rdt_mon_enable_key)) {
+ tmp = READ_ONCE(tsk->rmid);
+ if (tmp)
+ rmid = tmp;
+ }
+
+ if (closid != state->cur_closid || rmid != state->cur_rmid) {
+ state->cur_closid = closid;
+ state->cur_rmid = rmid;
+ wrmsr(MSR_IA32_PQR_ASSOC, rmid, closid);
+ }
+}
+
/*
* This is safe against resctrl_sched_in() called from __switch_to()
* because __switch_to() is executed with interrupts disabled. A local call
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 0917c7f25720..8f92a87d381d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
#include <linux/io.h>
#include <linux/kdebug.h>
#include <linux/syscalls.h>
+#include <linux/resctrl.h>

#include <asm/ldt.h>
#include <asm/processor.h>
@@ -51,7 +52,6 @@
#include <asm/debugreg.h>
#include <asm/switch_to.h>
#include <asm/vm86.h>
-#include <asm/resctrl.h>
#include <asm/proto.h>

#include "process.h"
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7062b84dd467..d442269bb25b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -40,6 +40,7 @@
#include <linux/ftrace.h>
#include <linux/syscalls.h>
#include <linux/iommu.h>
+#include <linux/resctrl.h>

#include <asm/processor.h>
#include <asm/pkru.h>
@@ -53,7 +54,6 @@
#include <asm/switch_to.h>
#include <asm/xen/hypervisor.h>
#include <asm/vdso.h>
-#include <asm/resctrl.h>
#include <asm/unistd.h>
#include <asm/fsgsbase.h>
#include <asm/fred.h>
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index a365f67131ec..62d607939a73 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -304,4 +304,25 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d);
extern unsigned int resctrl_rmid_realloc_threshold;
extern unsigned int resctrl_rmid_realloc_limit;

+DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
+
+void __resctrl_sched_in(struct task_struct *tsk);
+
+/*
+ * resctrl_sched_in() - Assigns the incoming task's control/monitor IDs to the
+ * current CPU
+ *
+ * To minimize impact to the scheduler hot path, this will stay as no-op unless
+ * running on a system supporting resctrl and the filesystem is mounted.
+ *
+ * Must be called with preemption disabled.
+ */
+static inline void resctrl_sched_in(struct task_struct *tsk)
+{
+#ifdef CONFIG_X86_CPU_RESCTRL
+ if (static_branch_likely(&rdt_enable_key))
+ __resctrl_sched_in(tsk);
+#endif
+}
+
#endif /* _RESCTRL_H */
--
2.44.0.396.g6e790dbe36-goog