[PATCH 2/2] memory tier: Support node migration between tiers

From: sthanneeru.opensrc
Date: Wed Dec 13 2023 - 12:54:28 EST


From: Srinivasulu Thanneeru <sthanneeru.opensrc@xxxxxxxxxx>

Node migration enables the grouping or migration of nodes
between tiers based on nodes' latencies and bandwidth characteristics.
Since nodes of the same memory-type can exist in different tiers and
can migrate from one tier to another, it is necessary to maintain
nodes per tier instead of maintaining a list of nodes grouped using
memory type(siblings) within the tier.

Signed-off-by: Srinivasulu Thanneeru <sthanneeru.opensrc@xxxxxxxxxx>
---
drivers/base/node.c | 6 ++++
include/linux/memory-tiers.h | 5 +++
include/linux/node.h | 5 +++
mm/memory-tiers.c | 66 +++++++++++++++++-------------------
4 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 788176b3585a..179d9004e4f3 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -597,6 +597,7 @@ static ssize_t memtier_override_store(struct device *dev,
return size;
ret = get_memtier_adistance_offset(nid, memtier);
node_devices[nid]->adistance_offset = ret;
+ node_memtier_change(nid);

return size;
}
@@ -607,6 +608,11 @@ void set_node_memtierid(int node, int memtierid)
node_devices[node]->memtier = memtierid;
}

+int get_node_adistance_offset(int node)
+{
+ return node_devices[node]->adistance_offset;
+}
+
static struct attribute *node_dev_attrs[] = {
&dev_attr_meminfo.attr,
&dev_attr_numastat.attr,
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 0dba8027e785..b323c2e2e417 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -54,6 +54,7 @@ int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf,
const char *source);
int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist);
int get_memtier_adistance_offset(int node, int memtier);
+void node_memtier_change(int node);
#ifdef CONFIG_MIGRATION
int next_demotion_node(int node);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
@@ -142,5 +143,9 @@ static inline int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist)
{
return -EIO;
}
+
+static inline void node_memtier_change(int node)
+{
+}
#endif /* CONFIG_NUMA */
#endif /* _LINUX_MEMORY_TIERS_H */
diff --git a/include/linux/node.h b/include/linux/node.h
index 1c4f4be39db4..da679577a271 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -141,6 +141,7 @@ extern int register_memory_node_under_compute_node(unsigned int mem_nid,
unsigned int cpu_nid,
unsigned access);
extern void set_node_memtierid(int node, int memtierid);
+extern int get_node_adistance_offset(int nid);
#else
static inline void node_dev_init(void)
{
@@ -171,6 +172,10 @@ static inline void unregister_memory_block_under_nodes(struct memory_block *mem_
static inline void set_node_memtierid(int node, int memtierid)
{
}
+static inline int get_node_adistance_offset(int nid)
+{
+ return 0;
+}
#endif

#define to_node(device) container_of(device, struct node, dev)
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 31ed3c577836..66e1eae97e47 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -23,6 +23,8 @@ struct memory_tier {
struct device dev;
/* All the nodes that are part of all the lower memory tiers. */
nodemask_t lower_tier_mask;
+ /* Nodes linked to this tier*/
+ nodemask_t nodes;
};

struct demotion_nodes {
@@ -120,13 +122,7 @@ static inline struct memory_tier *to_memory_tier(struct device *device)

static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier)
{
- nodemask_t nodes = NODE_MASK_NONE;
- struct memory_dev_type *memtype;
-
- list_for_each_entry(memtype, &memtier->memory_types, tier_sibling)
- nodes_or(nodes, nodes, memtype->nodes);
-
- return nodes;
+ return memtier->nodes;
}

static void memory_tier_device_release(struct device *dev)
@@ -182,33 +178,22 @@ int get_memtier_adistance_offset(int node, int memtier)
return adistance_offset;
}

-static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype)
+static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype,
+ int tier_adistance)
{
int ret;
bool found_slot = false;
struct memory_tier *memtier, *new_memtier;
- int adistance = memtype->adistance;
+ int adistance;
unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE;

lockdep_assert_held_once(&memory_tier_lock);

- adistance = round_down(adistance, memtier_adistance_chunk_size);
- /*
- * If the memtype is already part of a memory tier,
- * just return that.
- */
- if (!list_empty(&memtype->tier_sibling)) {
- list_for_each_entry(memtier, &memory_tiers, list) {
- if (adistance == memtier->adistance_start)
- return memtier;
- }
- WARN_ON(1);
- return ERR_PTR(-EINVAL);
- }
+ adistance = round_down(tier_adistance, memtier_adistance_chunk_size);

list_for_each_entry(memtier, &memory_tiers, list) {
if (adistance == memtier->adistance_start) {
- goto link_memtype;
+ return memtier;
} else if (adistance < memtier->adistance_start) {
found_slot = true;
break;
@@ -238,11 +223,8 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty
put_device(&new_memtier->dev);
return ERR_PTR(ret);
}
- memtier = new_memtier;

-link_memtype:
- list_add(&memtype->tier_sibling, &memtier->memory_types);
- return memtier;
+ return new_memtier;
}

static struct memory_tier *__node_get_memory_tier(int node)
@@ -500,7 +482,7 @@ static struct memory_tier *set_node_memory_tier(int node)
struct memory_tier *memtier;
struct memory_dev_type *memtype;
pg_data_t *pgdat = NODE_DATA(node);
-
+ int tier_adistance;

lockdep_assert_held_once(&memory_tier_lock);

@@ -511,11 +493,15 @@ static struct memory_tier *set_node_memory_tier(int node)

memtype = node_memory_types[node].memtype;
node_set(node, memtype->nodes);
- memtier = find_create_memory_tier(memtype);
+ tier_adistance = get_node_adistance_offset(node);
+ tier_adistance = memtype->adistance + tier_adistance;
+
+ memtier = find_create_memory_tier(memtype, tier_adistance);
if (!IS_ERR(memtier)) {
rcu_assign_pointer(pgdat->memtier, memtier);
set_node_memtierid(node, memtier->dev.id);
}
+ node_set(node, memtier->nodes);
return memtier;
}

@@ -551,11 +537,9 @@ static bool clear_node_memory_tier(int node)
synchronize_rcu();
memtype = node_memory_types[node].memtype;
node_clear(node, memtype->nodes);
- if (nodes_empty(memtype->nodes)) {
- list_del_init(&memtype->tier_sibling);
- if (list_empty(&memtier->memory_types))
- destroy_memory_tier(memtier);
- }
+ node_clear(node, memtier->nodes);
+ if (nodes_empty(memtier->nodes))
+ destroy_memory_tier(memtier);
cleared = true;
}
return cleared;
@@ -578,7 +562,6 @@ struct memory_dev_type *alloc_memory_type(int adistance)
return ERR_PTR(-ENOMEM);

memtype->adistance = adistance;
- INIT_LIST_HEAD(&memtype->tier_sibling);
memtype->nodes = NODE_MASK_NONE;
kref_init(&memtype->kref);
return memtype;
@@ -618,6 +601,19 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype)
}
EXPORT_SYMBOL_GPL(clear_node_memory_type);

+void node_memtier_change(int node)
+{
+ struct memory_tier *memtier;
+
+ mutex_lock(&memory_tier_lock);
+ if (clear_node_memory_tier(node))
+ establish_demotion_targets();
+ memtier = set_node_memory_tier(node);
+ if (!IS_ERR(memtier))
+ establish_demotion_targets();
+ mutex_unlock(&memory_tier_lock);
+}
+
static void dump_hmem_attrs(struct node_hmem_attrs *attrs, const char *prefix)
{
pr_info(
--
2.25.1