Re: [RFC PATCH 6/6] block: implement NVMEM provider

From: Daniel Golle
Date: Wed Jul 19 2023 - 20:16:22 EST


On Thu, Jul 20, 2023 at 08:04:56AM +0900, Damien Le Moal wrote:
> On 7/20/23 07:04, Daniel Golle wrote:
> > On embedded devices using an eMMC it is common that one or more partitions
> > on the eMMC are used to store MAC addresses and Wi-Fi calibration EEPROM
> > data. Allow referencing the partition in device tree for the kernel and
> > Wi-Fi drivers accessing it via the NVMEM layer.
> >
> > Signed-off-by: Daniel Golle <daniel@xxxxxxxxxxxxxx>
> > ---
> > block/Kconfig | 8 ++
> > block/Makefile | 1 +
> > block/blk-nvmem.c | 187 ++++++++++++++++++++++++++++++++++++++++
> > block/blk.h | 13 +++
> > block/genhd.c | 2 +
> > block/partitions/core.c | 2 +
> > 6 files changed, 213 insertions(+)
> > create mode 100644 block/blk-nvmem.c
> >
> > diff --git a/block/Kconfig b/block/Kconfig
> > index 86122e459fe04..185573877964d 100644
> > --- a/block/Kconfig
> > +++ b/block/Kconfig
> > @@ -218,6 +218,14 @@ config BLK_MQ_VIRTIO
> > config BLK_PM
> > def_bool PM
> >
> > +config BLK_NVMEM
> > + bool "Block device NVMEM provider"
> > + depends on OF
> > + help
> > + Allow block devices (or partitions) to act as NVMEM prodivers,
> > + typically using if an eMMC is used to store MAC addresses or Wi-Fi
>
> Odd grammar... May be "typically used with eMMC to store ..."

Thank you, I'll use your suggestion.

>
> > + calibration data on embedded devices.
> > +
> > # do not use in new code
> > config BLOCK_HOLDER_DEPRECATED
> > bool
> > diff --git a/block/Makefile b/block/Makefile
> > index 46ada9dc8bbfe..03c0bfa8642df 100644
> > --- a/block/Makefile
> > +++ b/block/Makefile
> > @@ -34,6 +34,7 @@ obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
> > obj-$(CONFIG_BLK_WBT) += blk-wbt.o
> > obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
> > obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
> > +obj-$(CONFIG_BLK_NVMEM) += blk-nvmem.o
> > obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
> > obj-$(CONFIG_BLK_PM) += blk-pm.o
> > obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \
> > diff --git a/block/blk-nvmem.c b/block/blk-nvmem.c
> > new file mode 100644
> > index 0000000000000..8238511049f56
> > --- /dev/null
> > +++ b/block/blk-nvmem.c
> > @@ -0,0 +1,187 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * block device NVMEM provider
> > + *
> > + * Copyright (c) 2023 Daniel Golle <daniel@xxxxxxxxxxxxxx>
> > + *
> > + * Useful on devices using a partition on an eMMC for MAC addresses or
> > + * Wi-Fi calibration EEPROM data.
> > + */
> > +
> > +#include "blk.h"
> > +#include <linux/nvmem-provider.h>
> > +#include <linux/of.h>
> > +#include <linux/pagemap.h>
> > +#include <linux/property.h>
> > +
> > +/* List of all NVMEM devices */
> > +static LIST_HEAD(nvmem_devices);
> > +static DEFINE_MUTEX(devices_mutex);
> > +
> > +struct blk_nvmem {
> > + struct nvmem_device *nvmem;
> > + struct block_device *bdev;
> > + struct list_head list;
> > +};
> > +
> > +static int blk_nvmem_reg_read(void *priv, unsigned int from,
> > + void *val, size_t bytes)
> > +{
> > + pgoff_t f_index = from >> PAGE_SHIFT;
> > + struct address_space *mapping;
> > + struct blk_nvmem *bnv = priv;
>
> Why not have bnv passed as argument directly ?

Because blk_nvmem_reg_read is used as reg_read function, ie.
nvmem_reg_read_t {aka 'int (*)(void *, unsigned int, void *, long unsigned int)'}

Hence 'void *' has to be implicitely type-casted into 'struct blk_nvmem *'.

>
> > + size_t bytes_left = bytes;
> > + struct folio *folio;
> > + unsigned long offs, to_read;
> > + void *p;
> > +
> > + if (!bnv->bdev)
> > + return -ENODEV;
> > +
> > + offs = from & ((1 << PAGE_SHIFT) - 1);
>
> offs = from & PAGE_MASK;

Right, that makes it easier...

>
> from being an int is really odd though.

Yep, but that's how NVMEM framework defines it.

>
> > + mapping = bnv->bdev->bd_inode->i_mapping;
> > +
> > + while (bytes_left) {
> > + folio = read_mapping_folio(mapping, f_index++, NULL);
> > + if (IS_ERR(folio))
> > + return PTR_ERR(folio);
> > +
> > + to_read = min_t(unsigned long, bytes_left, PAGE_SIZE - offs);
> > + p = folio_address(folio) + offset_in_folio(folio, offs);
> > + memcpy(val, p, to_read);
> > + offs = 0;
> > + bytes_left -= to_read;
> > + val += to_read;
> > + folio_put(folio);
> > + }
> > +
> > + return bytes_left == 0 ? 0 : -EIO;
>
> How can bytes_left be 0 here given the above loop with no break ?

Well, right... Can just be 'return 0;' at this point obviously.

>
> > +}
> > +
> > +void blk_register_nvmem(struct block_device *bdev)
> > +{
> > + struct fwnode_handle *fw_parts = NULL, *fw_part_c, *fw_part = NULL;
> > + struct nvmem_config config = {};
> > + const char *partname, *uuid;
> > + struct device *dev, *p0dev;
> > + struct blk_nvmem *bnv;
> > + u32 reg;
> > +
> > + /*
> > + * skip devices which set GENHD_FL_NO_NVMEM
> > + *
> > + * This flag is used for mtdblock and ubiblock devices because
> > + * both, MTD and UBI already implement their own NVMEM provider.
> > + * To avoid registering multiple NVMEM providers for the same
> > + * device node, skip the block NVMEM provider.
> > + */
> > + if (bdev->bd_disk->flags & GENHD_FL_NO_NVMEM)
> > + return;
> > +
> > + /* skip too large devices */
>
> Why ? Is that defined in some standards somewhere ?

NVMEM framework uses 'int' to address byte offsets inside NVMEM devices.
Hence devices larger than INT_MAX cannot be addressed, I will also state
that in that comment.

>
> > + if (bdev_nr_bytes(bdev) > INT_MAX)
> > + return;
> > +
> > + dev = &bdev->bd_device;
> > + if (!bdev_is_partition(bdev)) {
> > + fw_part = dev->fwnode;
> > +
> > + if (!fw_part && dev->parent)
> > + fw_part = dev->parent->fwnode;
> > +
> > + goto no_parts;
> > + }
> > +
> > + p0dev = &bdev->bd_disk->part0->bd_device;
> > + fw_parts = device_get_named_child_node(p0dev, "partitions");
> > + if (!fw_parts)
> > + fw_parts = device_get_named_child_node(p0dev->parent, "partitions");
> > +
> > + if (!fw_parts)
> > + return;
> > +
> > + fwnode_for_each_child_node(fw_parts, fw_part_c) {
> > + if (!fwnode_property_read_string(fw_part_c, "uuid", &uuid) &&
> > + (!bdev->bd_meta_info || strncmp(uuid,
> > + bdev->bd_meta_info->uuid,
> > + PARTITION_META_INFO_UUIDLTH)))
> > + continue;
> > +
> > + if (!fwnode_property_read_string(fw_part_c, "partname", &partname) &&
> > + (!bdev->bd_meta_info || strncmp(partname,
> > + bdev->bd_meta_info->volname,
> > + PARTITION_META_INFO_VOLNAMELTH)))
> > + continue;
> > +
> > + /*
> > + * partition addresses (reg) in device tree greater than
> > + * DISK_MAX_PARTS can be used to match uuid or partname only
> > + */
> > + if (!fwnode_property_read_u32(fw_part_c, "reg", &reg) &&
> > + reg < DISK_MAX_PARTS && bdev->bd_partno != reg)
> > + continue;
> > +
> > + fw_part = fw_part_c;
> > + break;
> > + }
> > +
> > +no_parts:
> > + if (!fwnode_device_is_compatible(fw_part, "nvmem-cells"))
> > + return;
> > +
> > + bnv = kzalloc(sizeof(struct blk_nvmem), GFP_KERNEL);
> > + if (!bnv)
> > + return;
> > +
> > + config.id = NVMEM_DEVID_NONE;
> > + config.dev = &bdev->bd_device;
> > + config.name = dev_name(&bdev->bd_device);
> > + config.owner = THIS_MODULE;
> > + config.priv = bnv;
> > + config.reg_read = blk_nvmem_reg_read;
> > + config.size = bdev_nr_bytes(bdev);
> > + config.word_size = 1;
> > + config.stride = 1;
> > + config.read_only = true;
> > + config.root_only = true;
> > + config.ignore_wp = true;
> > + config.of_node = to_of_node(fw_part);
> > +
> > + bnv->bdev = bdev;
> > + bnv->nvmem = nvmem_register(&config);
> > + if (IS_ERR(bnv->nvmem)) {
> > + /* Just ignore if there is no NVMEM support in the kernel */
>
> If there is not, why would this function even be called ?

True. Kconfig depends should make sure blk-nvmem is only built if
nvmem is supported at all.

>
> > + if (PTR_ERR(bnv->nvmem) != -EOPNOTSUPP)
> > + dev_err_probe(&bdev->bd_device, PTR_ERR(bnv->nvmem),
> > + "Failed to register NVMEM device\n");
> > +
> > + kfree(bnv);
> > + return;
> > + }
> > +
> > + mutex_lock(&devices_mutex);
> > + list_add_tail(&bnv->list, &nvmem_devices);
> > + mutex_unlock(&devices_mutex);
> > +}
> > +
> > +void blk_unregister_nvmem(struct block_device *bdev)
> > +{
> > + struct blk_nvmem *bnv_c, *bnv = NULL;
> > +
> > + mutex_lock(&devices_mutex);
> > + list_for_each_entry(bnv_c, &nvmem_devices, list)
> > + if (bnv_c->bdev == bdev) {
> > + bnv = bnv_c;
> > + break;
> > + }
>
> Curly brackets for list_for_each_entry() {} would be nice, even though they are
> not strictly necessary in this case.

Ack, will add them.

>
> > +
> > + if (!bnv) {
> > + mutex_unlock(&devices_mutex);
> > + return;
> > + }
> > +
> > + list_del(&bnv->list);
> > + mutex_unlock(&devices_mutex);
> > + nvmem_unregister(bnv->nvmem);
> > + kfree(bnv);
> > +}
> > diff --git a/block/blk.h b/block/blk.h
> > index 686712e138352..7423d0d5494e9 100644
> > --- a/block/blk.h
> > +++ b/block/blk.h
> > @@ -515,4 +515,17 @@ static inline int req_ref_read(struct request *req)
> > return atomic_read(&req->ref);
> > }
> >
> > +#ifdef CONFIG_BLK_NVMEM
> > +void blk_register_nvmem(struct block_device *bdev);
> > +void blk_unregister_nvmem(struct block_device *bdev);
> > +#else
> > +static inline void blk_register_nvmem(struct block_device *bdev)
> > +{
> > +}
>
> These could go at the end of the static inline line.

I tried keeping the existing style in that header file.
See a couple of lines above:
static inline void bio_integrity_free(struct bio *bio)
{
}

>
> > +
> > +static inline void blk_unregister_nvmem(struct block_device *bdev)
> > +{
> > +}
> > +#endif
> > +
> > #endif /* BLK_INTERNAL_H */
> > diff --git a/block/genhd.c b/block/genhd.c
> > index 3d287b32d50df..b306e0f407bb2 100644
> > --- a/block/genhd.c
> > +++ b/block/genhd.c
> > @@ -527,6 +527,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
> > disk_update_readahead(disk);
> > disk_add_events(disk);
> > set_bit(GD_ADDED, &disk->state);
> > + blk_register_nvmem(disk->part0);
> > return 0;
> >
> > out_unregister_bdi:
> > @@ -569,6 +570,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
> > if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
> > bdev->bd_holder_ops->mark_dead(bdev);
> > mutex_unlock(&bdev->bd_holder_lock);
> > + blk_unregister_nvmem(bdev);
> >
> > put_device(&bdev->bd_device);
> > rcu_read_lock();
> > diff --git a/block/partitions/core.c b/block/partitions/core.c
> > index 13a7341299a91..68bd655f5e68e 100644
> > --- a/block/partitions/core.c
> > +++ b/block/partitions/core.c
> > @@ -404,6 +404,8 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
> > /* suppress uevent if the disk suppresses it */
> > if (!dev_get_uevent_suppress(ddev))
> > kobject_uevent(&pdev->kobj, KOBJ_ADD);
> > +
> > + blk_register_nvmem(bdev);
> > return bdev;
> >
> > out_del:
>
> --
> Damien Le Moal
> Western Digital Research
>