Re: [PATCH 1/2] memory tier: Introduce sysfs for tier interleave weights.

From: Jonathan Cameron
Date: Mon Oct 02 2023 - 06:26:36 EST


On Wed, 27 Sep 2023 15:20:01 +0530
Ravi Jonnalagadda <ravis.opensrc@xxxxxxxxxx> wrote:

> From: Srinivasulu Thanneeru <sthanneeru@xxxxxxxxxx>
>
> Allocating pages across tiers is accomplished by provisioning
> interleave weights for each tier, with the distribution based on
> these weight values.
> By default, all tiers will have a weight of 1, which means
> default standard page allocation. By default all nodes within
> tier will have weight of 1.
>
> Signed-off-by: Srinivasulu Thanneeru <sthanneeru@xxxxxxxxxx>
> Co-authored-by: Ravi Jonnalagadda <ravis.opensrc@xxxxxxxxxx>

ABI docs?

Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers

A few trivial comments inline.
> ---
> include/linux/memory-tiers.h | 2 ++
> mm/memory-tiers.c | 46 +++++++++++++++++++++++++++++++++++-
> 2 files changed, 47 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
> index 437441cdf78f..c62d286749d0 100644
> --- a/include/linux/memory-tiers.h
> +++ b/include/linux/memory-tiers.h
> @@ -19,6 +19,8 @@
> */
> #define MEMTIER_ADISTANCE_DRAM ((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
>
> +#define MAX_TIER_INTERLEAVE_WEIGHT 100
> +
> struct memory_tier;
> struct memory_dev_type {
> /* list of memory types that are part of same tier as this type */
> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
> index 37a4f59d9585..7e06c9e0fa41 100644
> --- a/mm/memory-tiers.c
> +++ b/mm/memory-tiers.c
> @@ -13,6 +13,11 @@ struct memory_tier {
> struct list_head list;
> /* list of all memory types part of this tier */
> struct list_head memory_types;
> + /*
> + * By default all tiers will have weight as 1, which means they
> + * follow default standard allocation.
> + */
> + unsigned short interleave_weight;

If you are going to use fixed size, keep it going.
u16 (u8 as per below comment probably makes more sense)


> /*
> * start value of abstract distance. memory tier maps
> * an abstract distance range,
> @@ -145,8 +150,45 @@ static ssize_t nodelist_show(struct device *dev,
> }
> static DEVICE_ATTR_RO(nodelist);
>
> +static ssize_t interleave_weight_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + int ret;
> + struct memory_tier *tier = to_memory_tier(dev);
> +
> + mutex_lock(&memory_tier_lock);
> + ret = sysfs_emit(buf, "%u\n", tier->interleave_weight);
> + mutex_unlock(&memory_tier_lock);

For this one

guard(mutex)(&memory_tier_lock);
return sysfs_emit()...

would perhaps be slightly nicer
(see below)

> +
> + return ret;
> +}
> +
> +static ssize_t interleave_weight_store(struct device *dev,
> + struct device_attribute *attr,
> + const char *buf, size_t size)
> +{
> + unsigned short value;
> + int ret;
> + struct memory_tier *tier = to_memory_tier(dev);
> +
> + ret = kstrtou16(buf, 0, &value);

Why u16? Max is 100. I'd not mind if you just put it in an
unsigned int, but seems odd to chose a specific size and
pick one that is twice as big as needed!

> +
> + if (ret)
> + return ret;
> + if (value > MAX_TIER_INTERLEAVE_WEIGHT)
> + return -EINVAL;
> +
> + mutex_lock(&memory_tier_lock);

You could play with the new cleanup.h toys though it doesn't save a lot here.

scoped_guard(mutex)(&memory_tier_lock)
tier->interleave_weight = value;

> + tier->interleave_weight = value;
> + mutex_unlock(&memory_tier_lock);
> +
> + return size;
> +}
> +static DEVICE_ATTR_RW(interleave_weight);
> +
> static struct attribute *memtier_dev_attrs[] = {
> &dev_attr_nodelist.attr,
> + &dev_attr_interleave_weight.attr,
> NULL
> };
>
> @@ -489,8 +531,10 @@ static struct memory_tier *set_node_memory_tier(int node)
> memtype = node_memory_types[node].memtype;
> node_set(node, memtype->nodes);
> memtier = find_create_memory_tier(memtype);
> - if (!IS_ERR(memtier))
> + if (!IS_ERR(memtier)) {
> rcu_assign_pointer(pgdat->memtier, memtier);
> + memtier->interleave_weight = 1;
> + }
> return memtier;
> }
>