[tip:x86/cpu] x86/intel_rdt/mba: Add primary support for Memory Bandwidth Allocation (MBA)

From: tip-bot for Vikas Shivappa
Date: Fri Apr 14 2017 - 10:21:03 EST


Commit-ID: 05b93417ce5b924c6652de19fdcc27439ab37c90
Gitweb: http://git.kernel.org/tip/05b93417ce5b924c6652de19fdcc27439ab37c90
Author: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
AuthorDate: Fri, 7 Apr 2017 17:33:53 -0700
Committer: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
CommitDate: Fri, 14 Apr 2017 16:10:08 +0200

x86/intel_rdt/mba: Add primary support for Memory Bandwidth Allocation (MBA)

The MBA feature details like minimum bandwidth supported, bandwidth
granularity etc are obtained via executing CPUID with EAX=10H ,ECX=3.

Setup and initialize the MBA specific extensions to data structures like
global list of RDT resources, RDT resource structure and RDT domain
structure.

[ tglx: Split out the seperate structure and the CBM related parts ]

Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
Cc: ravi.v.shankar@xxxxxxxxx
Cc: tony.luck@xxxxxxxxx
Cc: fenghua.yu@xxxxxxxxx
Cc: vikas.shivappa@xxxxxxxxx
Link: http://lkml.kernel.org/r/1491611637-20417-5-git-send-email-vikas.shivappa@xxxxxxxxxxxxxxx
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>

---
arch/x86/include/asm/intel_rdt.h | 24 ++++++++++-
arch/x86/kernel/cpu/intel_rdt.c | 89 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 110 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 6295594..0620fc9 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -12,6 +12,7 @@
#define IA32_L3_QOS_CFG 0xc81
#define IA32_L3_CBM_BASE 0xc90
#define IA32_L2_CBM_BASE 0xd10
+#define IA32_MBA_THRTL_BASE 0xd50

#define L3_QOS_CDP_ENABLE 0x01ULL

@@ -120,6 +121,23 @@ struct rdt_cache {
};

/**
+ * struct rdt_membw - Memory bandwidth allocation related data
+ * @max_delay: Max throttle delay. Delay is the hardware
+ * representation for memory bandwidth.
+ * @min_bw: Minimum memory bandwidth percentage user can request
+ * @bw_gran: Granularity at which the memory bandwidth is allocated
+ * @delay_linear: True if memory B/W delay is in linear scale
+ * @mb_map: Mapping of memory B/W percentage to memory B/W delay
+ */
+struct rdt_membw {
+ u32 max_delay;
+ u32 min_bw;
+ u32 bw_gran;
+ u32 delay_linear;
+ u32 *mb_map;
+};
+
+/**
* struct rdt_resource - attributes of an RDT resource
* @enabled: Is this feature enabled on this machine
* @capable: Is this feature available on this machine
@@ -145,7 +163,10 @@ struct rdt_resource {
struct rdt_resource *r);
int data_width;
struct list_head domains;
- struct rdt_cache cache;
+ union {
+ struct rdt_cache cache;
+ struct rdt_membw membw;
+ };
};

extern struct mutex rdtgroup_mutex;
@@ -161,6 +182,7 @@ enum {
RDT_RESOURCE_L3DATA,
RDT_RESOURCE_L3CODE,
RDT_RESOURCE_L2,
+ RDT_RESOURCE_MBA,

/* Must be the last */
RDT_NUM_RESOURCES,
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 82eafd6..ae1aec1 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -32,6 +32,9 @@
#include <asm/intel-family.h>
#include <asm/intel_rdt.h>

+#define MAX_MBA_BW 100u
+#define MBA_IS_LINEAR 0x4
+
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);

@@ -44,6 +47,8 @@ DEFINE_PER_CPU_READ_MOSTLY(int, cpu_closid);
int max_name_width, max_data_width;

static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);
+static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r);

#define domain_init(id) LIST_HEAD_INIT(rdt_resources_all[id].domains)
@@ -97,6 +102,13 @@ struct rdt_resource rdt_resources_all[] = {
.cbm_idx_offset = 0,
},
},
+ {
+ .name = "MB",
+ .domains = domain_init(RDT_RESOURCE_MBA),
+ .msr_base = IA32_MBA_THRTL_BASE,
+ .msr_update = mba_wrmsr,
+ .cache_level = 3,
+ },
};

static unsigned int cbm_idx(struct rdt_resource *r, unsigned int closid)
@@ -151,6 +163,53 @@ static inline bool cache_alloc_hsw_probe(void)
return false;
}

+/*
+ * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
+ * exposed to user interface and the h/w understandable delay values.
+ *
+ * The non-linear delay values have the granularity of power of two
+ * and also the h/w does not guarantee a curve for configured delay
+ * values vs. actual b/w enforced.
+ * Hence we need a mapping that is pre calibrated so the user can
+ * express the memory b/w as a percentage value.
+ */
+static inline bool rdt_get_mb_table(struct rdt_resource *r)
+{
+ /*
+ * There are no Intel SKUs as of now to support non-linear delay.
+ */
+ pr_info("MBA b/w map not implemented for cpu:%d, model:%d",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
+
+ return false;
+}
+
+static bool rdt_get_mem_config(struct rdt_resource *r)
+{
+ union cpuid_0x10_3_eax eax;
+ union cpuid_0x10_x_edx edx;
+ u32 ebx, ecx;
+
+ cpuid_count(0x00000010, 3, &eax.full, &ebx, &ecx, &edx.full);
+ r->num_closid = edx.split.cos_max + 1;
+ r->membw.max_delay = eax.split.max_delay + 1;
+ r->default_ctrl = MAX_MBA_BW;
+ if (ecx & MBA_IS_LINEAR) {
+ r->membw.delay_linear = true;
+ r->membw.min_bw = MAX_MBA_BW - r->membw.max_delay;
+ r->membw.bw_gran = MAX_MBA_BW - r->membw.max_delay;
+ } else {
+ if (!rdt_get_mb_table(r))
+ return false;
+ }
+ r->data_width = 3;
+
+ r->capable = true;
+ r->enabled = true;
+
+ return true;
+}
+
static void rdt_get_cache_config(int idx, struct rdt_resource *r)
{
union cpuid_0x10_1_eax eax;
@@ -196,6 +255,30 @@ static int get_cache_id(int cpu, int level)
return -1;
}

+/*
+ * Map the memory b/w percentage value to delay values
+ * that can be written to QOS_MSRs.
+ * There are currently no SKUs which support non linear delay values.
+ */
+static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+{
+ if (r->membw.delay_linear)
+ return MAX_MBA_BW - bw;
+
+ pr_warn_once("Non Linear delay-bw map not supported but queried\n");
+ return r->default_ctrl;
+}
+
+static void
+mba_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
+{
+ unsigned int i;
+
+ /* Write the delay values for mba. */
+ for (i = m->low; i < m->high; i++)
+ wrmsrl(r->msr_base + i, delay_bw_map(d->ctrl_val[i], r));
+}
+
static void
cat_wrmsr(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r)
{
@@ -431,8 +514,10 @@ static __init bool get_rdt_resources(void)
ret = true;
}

- if (boot_cpu_has(X86_FEATURE_MBA))
- ret = true;
+ if (boot_cpu_has(X86_FEATURE_MBA)) {
+ if (rdt_get_mem_config(&rdt_resources_all[RDT_RESOURCE_MBA]))
+ ret = true;
+ }

return ret;
}