Re: [PATCH v7 05/24] x86/resctrl: Track the closid with the rmid

From: Moger, Babu
Date: Thu Nov 09 2023 - 15:32:01 EST


Hi James,

On 10/25/23 13:03, James Morse wrote:
> x86's RMID are independent of the CLOSID. An RMID can be allocated,
> used and freed without considering the CLOSID.
>
> MPAM's equivalent feature is PMG, which is not an independent number,
> it extends the CLOSID/PARTID space. For MPAM, only PMG-bits worth of
> 'RMID' can be allocated for a single CLOSID.
> i.e. if there is 1 bit of PMG space, then each CLOSID can have two
> monitor groups.
>
> To allow resctrl to disambiguate RMID values for different CLOSID,
> everything in resctrl that keeps an RMID value needs to know the CLOSID
> too. This will always be ignored on x86.
>
> Tested-by: Shaopeng Tan <tan.shaopeng@xxxxxxxxxxx>
> Tested-by: Peter Newman <peternewman@xxxxxxxxxx>
> Reviewed-by: Shaopeng Tan <tan.shaopeng@xxxxxxxxxxx>
> Reviewed-by: Xin Hao <xhao@xxxxxxxxxxxxxxxxx>
> Signed-off-by: James Morse <james.morse@xxxxxxx>
>
> ---
> Is there a better term for 'the unique identifier for a monitor group'.
> Using RMID for that here may be confusing...
>
> Changes since v1:
> * Added comment in struct rmid_entry
>
> Changes since v2:
> * Moved X86_RESCTRL_BAD_CLOSID from a subsequent patch
>
> Chances since v3:
> * Renamed X86_RESCTRL_BAD_CLOSID to EMPTY
> * Clarified a few comments and kernel-doc
>
> Changes since v5:
> * Use entry->closid from the iterator, instead of the parent control group.
> * Move the reserved defines into this patch to reduce the churn.
> * Added some kernel doc.
> * Renamed some arch closid parameters as 'unused'.
>
> Changes since v6:
> * Changes to comments.
> ---
> arch/x86/include/asm/resctrl.h | 7 +++
> arch/x86/kernel/cpu/resctrl/internal.h | 2 +-
> arch/x86/kernel/cpu/resctrl/monitor.c | 74 ++++++++++++++---------
> arch/x86/kernel/cpu/resctrl/pseudo_lock.c | 4 +-
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 ++--
> include/linux/resctrl.h | 16 ++++-
> 6 files changed, 77 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
> index 255a78d9d906..cc6e1bce7b1a 100644
> --- a/arch/x86/include/asm/resctrl.h
> +++ b/arch/x86/include/asm/resctrl.h
> @@ -7,6 +7,13 @@
> #include <linux/sched.h>
> #include <linux/jump_label.h>
>
> +/*
> + * This value can never be a valid CLOSID, and is used when mapping a
> + * (closid, rmid) pair to an index and back. On x86 only the RMID is
> + * needed. The index is a software defined value.
> + */
> +#define X86_RESCTRL_EMPTY_CLOSID ((u32)~0)
> +
> /**
> * struct resctrl_pqr_state - State cache for the PQR MSR
> * @cur_rmid: The cached Resource Monitoring ID
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index f68c6aecfa66..c836e3294e12 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -544,7 +544,7 @@ struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
> int closids_supported(void);
> void closid_free(int closid);
> int alloc_rmid(void);
> -void free_rmid(u32 rmid);
> +void free_rmid(u32 closid, u32 rmid);
> int rdt_get_mon_l3_config(struct rdt_resource *r);
> void __exit rdt_put_mon_l3_config(struct rdt_resource *r);
> bool __init rdt_cpu_has(int flag);
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index 5d9864919f1c..2a0233cd0bc9 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -24,7 +24,20 @@
>
> #include "internal.h"
>
> +/**
> + * struct rmid_entry - dirty tracking for all RMID.
> + * @closid: The CLOSID for this entry.
> + * @rmid: The RMID for this entry.
> + * @busy: The number of domains with cached data using this RMID.
> + * @list: Member of the rmid_free_lru list when busy == 0.
> + *
> + * Depending on the architecture the correct monitor is accessed using
> + * both @closid and @rmid, or @rmid only.
> + *
> + * Take the rdtgroup_mutex when accessing.
> + */
> struct rmid_entry {
> + u32 closid;
> u32 rmid;
> int busy;
> struct list_head list;
> @@ -136,7 +149,7 @@ static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val)
> return val;
> }
>
> -static inline struct rmid_entry *__rmid_entry(u32 rmid)
> +static inline struct rmid_entry *__rmid_entry(u32 closid, u32 rmid)
> {
> struct rmid_entry *entry;
>
> @@ -190,7 +203,8 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom,
> }
>
> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid)
> + u32 unused, u32 rmid,
> + enum resctrl_event_id eventid)
> {
> struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> struct arch_mbm_state *am;
> @@ -230,7 +244,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
> }
>
> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
> - u32 rmid, enum resctrl_event_id eventid, u64 *val)
> + u32 unused, u32 rmid, enum resctrl_event_id eventid,
> + u64 *val)
> {
> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
> @@ -285,9 +300,9 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
> if (nrmid >= r->num_rmid)
> break;
>
> - entry = __rmid_entry(nrmid);
> + entry = __rmid_entry(X86_RESCTRL_EMPTY_CLOSID, nrmid);// temporary

What is temporary means here? Can you please elaborate(or remove)?

Thanks
Babu Moger