[RFC PATCH 2/2] memory tier: Support node migration between tiers

From: sthanneeru.opensrc
Date: Thu Nov 30 2023 - 16:49:43 EST


From: Srinivasulu Thanneeru <sthanneeru.opensrc@xxxxxxxxxx>

Node migration enables the grouping or migration of nodes
between tiers based on nodes' latencies and bandwidth characteristics.
Since nodes of the same memory-type can exist in different tiers and can
migrate from one tier to another, it is necessary to maintain nodes
per tier instead of maintaining a list of nodes grouped using
memory type(siblings) within the tier.

To migrate a node from one tier to another, remove the node from the
current tier and insert it into the target tier. If the target tier does
not exist, create a new one.

Signed-off-by: Srinivasulu Thanneeru <sthanneeru.opensrc@xxxxxxxxxx>
---
drivers/base/node.c | 6 ++++
include/linux/memory-tiers.h | 5 +++
include/linux/node.h | 5 +++
mm/memory-tiers.c | 65 +++++++++++++++++-------------------
4 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 1e63c692977b..8290ea96b439 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -608,10 +608,16 @@ static ssize_t adistance_offset_store(struct device *dev,
return -EINVAL;

node_devices[nid]->adistance_offset = value;
+ node_memtier_change(nid);
return size;
}
static DEVICE_ATTR_RW(adistance_offset);

+int get_node_adistance_offset(int nid)
+{
+ return node_devices[nid]->adistance_offset;
+}
+
static struct attribute *node_dev_attrs[] = {
&dev_attr_meminfo.attr,
&dev_attr_numastat.attr,
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index ff4e7136ab40..e86c23873334 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -49,6 +49,7 @@ int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf,
const char *source);
int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist);
int get_target_memtier_adistance(int node, int adistance_offset);
+void node_memtier_change(int node);
#ifdef CONFIG_MIGRATION
int next_demotion_node(int node);
void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
@@ -142,5 +143,9 @@ static int get_target_memtier_adistance(int node, int adistance_offset)
{
return 0;
}
+
+static inline void node_memtier_change(int node)
+{
+}
#endif /* CONFIG_NUMA */
#endif /* _LINUX_MEMORY_TIERS_H */
diff --git a/include/linux/node.h b/include/linux/node.h
index fd0f4f3177f8..5150215b4922 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -139,6 +139,7 @@ extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
extern int register_memory_node_under_compute_node(unsigned int mem_nid,
unsigned int cpu_nid,
unsigned access);
+extern int get_node_adistance_offset(int nid);
#else
static inline void node_dev_init(void)
{
@@ -166,6 +167,10 @@ static inline int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
{
}
+static inline int get_node_adistance_offset(int nid)
+{
+ return 0;
+}
#endif

#define to_node(device) container_of(device, struct node, dev)
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index a40d4d4383d7..b6cd86977731 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -23,6 +23,8 @@ struct memory_tier {
struct device dev;
/* All the nodes that are part of all the lower memory tiers. */
nodemask_t lower_tier_mask;
+ /* Nodes linked to this tier*/
+ nodemask_t nodes;
};

struct demotion_nodes {
@@ -120,13 +122,7 @@ static inline struct memory_tier *to_memory_tier(struct device *device)

static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier)
{
- nodemask_t nodes = NODE_MASK_NONE;
- struct memory_dev_type *memtype;
-
- list_for_each_entry(memtype, &memtier->memory_types, tier_sibling)
- nodes_or(nodes, nodes, memtype->nodes);
-
- return nodes;
+ return memtier->nodes;
}

static void memory_tier_device_release(struct device *dev)
@@ -181,33 +177,22 @@ int get_target_memtier_adistance(int node, int adistance_offset)
return node_adistance;
}

-static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype)
+static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype,
+ int tier_adistance)
{
int ret;
bool found_slot = false;
struct memory_tier *memtier, *new_memtier;
- int adistance = memtype->adistance;
+ int adistance;
unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE;

lockdep_assert_held_once(&memory_tier_lock);

- adistance = round_down(adistance, memtier_adistance_chunk_size);
- /*
- * If the memtype is already part of a memory tier,
- * just return that.
- */
- if (!list_empty(&memtype->tier_sibling)) {
- list_for_each_entry(memtier, &memory_tiers, list) {
- if (adistance == memtier->adistance_start)
- return memtier;
- }
- WARN_ON(1);
- return ERR_PTR(-EINVAL);
- }
+ adistance = round_down(tier_adistance, memtier_adistance_chunk_size);

list_for_each_entry(memtier, &memory_tiers, list) {
if (adistance == memtier->adistance_start) {
- goto link_memtype;
+ return memtier;
} else if (adistance < memtier->adistance_start) {
found_slot = true;
break;
@@ -238,9 +223,6 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty
return ERR_PTR(ret);
}
memtier = new_memtier;
-
-link_memtype:
- list_add(&memtype->tier_sibling, &memtier->memory_types);
return memtier;
}

@@ -499,7 +481,7 @@ static struct memory_tier *set_node_memory_tier(int node)
struct memory_tier *memtier;
struct memory_dev_type *memtype;
pg_data_t *pgdat = NODE_DATA(node);
-
+ int tier_adistance;

lockdep_assert_held_once(&memory_tier_lock);

@@ -510,9 +492,13 @@ static struct memory_tier *set_node_memory_tier(int node)

memtype = node_memory_types[node].memtype;
node_set(node, memtype->nodes);
- memtier = find_create_memory_tier(memtype);
+ tier_adistance = get_node_adistance_offset(node);
+ tier_adistance = memtype->adistance + tier_adistance;
+
+ memtier = find_create_memory_tier(memtype, tier_adistance);
if (!IS_ERR(memtier))
rcu_assign_pointer(pgdat->memtier, memtier);
+ node_set(node, memtier->nodes);
return memtier;
}

@@ -548,11 +534,9 @@ static bool clear_node_memory_tier(int node)
synchronize_rcu();
memtype = node_memory_types[node].memtype;
node_clear(node, memtype->nodes);
- if (nodes_empty(memtype->nodes)) {
- list_del_init(&memtype->tier_sibling);
- if (list_empty(&memtier->memory_types))
- destroy_memory_tier(memtier);
- }
+ node_clear(node, memtier->nodes);
+ if (nodes_empty(memtier->nodes))
+ destroy_memory_tier(memtier);
cleared = true;
}
return cleared;
@@ -575,7 +559,6 @@ struct memory_dev_type *alloc_memory_type(int adistance)
return ERR_PTR(-ENOMEM);

memtype->adistance = adistance;
- INIT_LIST_HEAD(&memtype->tier_sibling);
memtype->nodes = NODE_MASK_NONE;
kref_init(&memtype->kref);
return memtype;
@@ -615,6 +598,20 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype)
}
EXPORT_SYMBOL_GPL(clear_node_memory_type);

+void node_memtier_change(int node)
+{
+ struct memory_tier *memtier;
+
+ mutex_lock(&memory_tier_lock);
+ if (clear_node_memory_tier(node))
+ establish_demotion_targets();
+ memtier = set_node_memory_tier(node);
+ if (!IS_ERR(memtier))
+ establish_demotion_targets();
+ mutex_unlock(&memory_tier_lock);
+}
+
+
static void dump_hmem_attrs(struct node_hmem_attrs *attrs, const char *prefix)
{
pr_info(
--
2.25.1