Re: [RFC PATCH 6/6] block: implement NVMEM provider

From: Damien Le Moal
Date: Wed Jul 19 2023 - 19:05:13 EST


On 7/20/23 07:04, Daniel Golle wrote:
> On embedded devices using an eMMC it is common that one or more partitions
> on the eMMC are used to store MAC addresses and Wi-Fi calibration EEPROM
> data. Allow referencing the partition in device tree for the kernel and
> Wi-Fi drivers accessing it via the NVMEM layer.
>
> Signed-off-by: Daniel Golle <daniel@xxxxxxxxxxxxxx>
> ---
> block/Kconfig | 8 ++
> block/Makefile | 1 +
> block/blk-nvmem.c | 187 ++++++++++++++++++++++++++++++++++++++++
> block/blk.h | 13 +++
> block/genhd.c | 2 +
> block/partitions/core.c | 2 +
> 6 files changed, 213 insertions(+)
> create mode 100644 block/blk-nvmem.c
>
> diff --git a/block/Kconfig b/block/Kconfig
> index 86122e459fe04..185573877964d 100644
> --- a/block/Kconfig
> +++ b/block/Kconfig
> @@ -218,6 +218,14 @@ config BLK_MQ_VIRTIO
> config BLK_PM
> def_bool PM
>
> +config BLK_NVMEM
> + bool "Block device NVMEM provider"
> + depends on OF
> + help
> + Allow block devices (or partitions) to act as NVMEM prodivers,
> + typically using if an eMMC is used to store MAC addresses or Wi-Fi

Odd grammar... May be "typically used with eMMC to store ..."

> + calibration data on embedded devices.
> +
> # do not use in new code
> config BLOCK_HOLDER_DEPRECATED
> bool
> diff --git a/block/Makefile b/block/Makefile
> index 46ada9dc8bbfe..03c0bfa8642df 100644
> --- a/block/Makefile
> +++ b/block/Makefile
> @@ -34,6 +34,7 @@ obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
> obj-$(CONFIG_BLK_WBT) += blk-wbt.o
> obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
> obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
> +obj-$(CONFIG_BLK_NVMEM) += blk-nvmem.o
> obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o
> obj-$(CONFIG_BLK_PM) += blk-pm.o
> obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \
> diff --git a/block/blk-nvmem.c b/block/blk-nvmem.c
> new file mode 100644
> index 0000000000000..8238511049f56
> --- /dev/null
> +++ b/block/blk-nvmem.c
> @@ -0,0 +1,187 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * block device NVMEM provider
> + *
> + * Copyright (c) 2023 Daniel Golle <daniel@xxxxxxxxxxxxxx>
> + *
> + * Useful on devices using a partition on an eMMC for MAC addresses or
> + * Wi-Fi calibration EEPROM data.
> + */
> +
> +#include "blk.h"
> +#include <linux/nvmem-provider.h>
> +#include <linux/of.h>
> +#include <linux/pagemap.h>
> +#include <linux/property.h>
> +
> +/* List of all NVMEM devices */
> +static LIST_HEAD(nvmem_devices);
> +static DEFINE_MUTEX(devices_mutex);
> +
> +struct blk_nvmem {
> + struct nvmem_device *nvmem;
> + struct block_device *bdev;
> + struct list_head list;
> +};
> +
> +static int blk_nvmem_reg_read(void *priv, unsigned int from,
> + void *val, size_t bytes)
> +{
> + pgoff_t f_index = from >> PAGE_SHIFT;
> + struct address_space *mapping;
> + struct blk_nvmem *bnv = priv;

Why not have bnv passed as argument directly ?

> + size_t bytes_left = bytes;
> + struct folio *folio;
> + unsigned long offs, to_read;
> + void *p;
> +
> + if (!bnv->bdev)
> + return -ENODEV;
> +
> + offs = from & ((1 << PAGE_SHIFT) - 1);

offs = from & PAGE_MASK;

from being an int is really odd though.

> + mapping = bnv->bdev->bd_inode->i_mapping;
> +
> + while (bytes_left) {
> + folio = read_mapping_folio(mapping, f_index++, NULL);
> + if (IS_ERR(folio))
> + return PTR_ERR(folio);
> +
> + to_read = min_t(unsigned long, bytes_left, PAGE_SIZE - offs);
> + p = folio_address(folio) + offset_in_folio(folio, offs);
> + memcpy(val, p, to_read);
> + offs = 0;
> + bytes_left -= to_read;
> + val += to_read;
> + folio_put(folio);
> + }
> +
> + return bytes_left == 0 ? 0 : -EIO;

How can bytes_left be 0 here given the above loop with no break ?

> +}
> +
> +void blk_register_nvmem(struct block_device *bdev)
> +{
> + struct fwnode_handle *fw_parts = NULL, *fw_part_c, *fw_part = NULL;
> + struct nvmem_config config = {};
> + const char *partname, *uuid;
> + struct device *dev, *p0dev;
> + struct blk_nvmem *bnv;
> + u32 reg;
> +
> + /*
> + * skip devices which set GENHD_FL_NO_NVMEM
> + *
> + * This flag is used for mtdblock and ubiblock devices because
> + * both, MTD and UBI already implement their own NVMEM provider.
> + * To avoid registering multiple NVMEM providers for the same
> + * device node, skip the block NVMEM provider.
> + */
> + if (bdev->bd_disk->flags & GENHD_FL_NO_NVMEM)
> + return;
> +
> + /* skip too large devices */

Why ? Is that defined in some standards somewhere ?

> + if (bdev_nr_bytes(bdev) > INT_MAX)
> + return;
> +
> + dev = &bdev->bd_device;
> + if (!bdev_is_partition(bdev)) {
> + fw_part = dev->fwnode;
> +
> + if (!fw_part && dev->parent)
> + fw_part = dev->parent->fwnode;
> +
> + goto no_parts;
> + }
> +
> + p0dev = &bdev->bd_disk->part0->bd_device;
> + fw_parts = device_get_named_child_node(p0dev, "partitions");
> + if (!fw_parts)
> + fw_parts = device_get_named_child_node(p0dev->parent, "partitions");
> +
> + if (!fw_parts)
> + return;
> +
> + fwnode_for_each_child_node(fw_parts, fw_part_c) {
> + if (!fwnode_property_read_string(fw_part_c, "uuid", &uuid) &&
> + (!bdev->bd_meta_info || strncmp(uuid,
> + bdev->bd_meta_info->uuid,
> + PARTITION_META_INFO_UUIDLTH)))
> + continue;
> +
> + if (!fwnode_property_read_string(fw_part_c, "partname", &partname) &&
> + (!bdev->bd_meta_info || strncmp(partname,
> + bdev->bd_meta_info->volname,
> + PARTITION_META_INFO_VOLNAMELTH)))
> + continue;
> +
> + /*
> + * partition addresses (reg) in device tree greater than
> + * DISK_MAX_PARTS can be used to match uuid or partname only
> + */
> + if (!fwnode_property_read_u32(fw_part_c, "reg", &reg) &&
> + reg < DISK_MAX_PARTS && bdev->bd_partno != reg)
> + continue;
> +
> + fw_part = fw_part_c;
> + break;
> + }
> +
> +no_parts:
> + if (!fwnode_device_is_compatible(fw_part, "nvmem-cells"))
> + return;
> +
> + bnv = kzalloc(sizeof(struct blk_nvmem), GFP_KERNEL);
> + if (!bnv)
> + return;
> +
> + config.id = NVMEM_DEVID_NONE;
> + config.dev = &bdev->bd_device;
> + config.name = dev_name(&bdev->bd_device);
> + config.owner = THIS_MODULE;
> + config.priv = bnv;
> + config.reg_read = blk_nvmem_reg_read;
> + config.size = bdev_nr_bytes(bdev);
> + config.word_size = 1;
> + config.stride = 1;
> + config.read_only = true;
> + config.root_only = true;
> + config.ignore_wp = true;
> + config.of_node = to_of_node(fw_part);
> +
> + bnv->bdev = bdev;
> + bnv->nvmem = nvmem_register(&config);
> + if (IS_ERR(bnv->nvmem)) {
> + /* Just ignore if there is no NVMEM support in the kernel */

If there is not, why would this function even be called ?

> + if (PTR_ERR(bnv->nvmem) != -EOPNOTSUPP)
> + dev_err_probe(&bdev->bd_device, PTR_ERR(bnv->nvmem),
> + "Failed to register NVMEM device\n");
> +
> + kfree(bnv);
> + return;
> + }
> +
> + mutex_lock(&devices_mutex);
> + list_add_tail(&bnv->list, &nvmem_devices);
> + mutex_unlock(&devices_mutex);
> +}
> +
> +void blk_unregister_nvmem(struct block_device *bdev)
> +{
> + struct blk_nvmem *bnv_c, *bnv = NULL;
> +
> + mutex_lock(&devices_mutex);
> + list_for_each_entry(bnv_c, &nvmem_devices, list)
> + if (bnv_c->bdev == bdev) {
> + bnv = bnv_c;
> + break;
> + }

Curly brackets for list_for_each_entry() {} would be nice, even though they are
not strictly necessary in this case.

> +
> + if (!bnv) {
> + mutex_unlock(&devices_mutex);
> + return;
> + }
> +
> + list_del(&bnv->list);
> + mutex_unlock(&devices_mutex);
> + nvmem_unregister(bnv->nvmem);
> + kfree(bnv);
> +}
> diff --git a/block/blk.h b/block/blk.h
> index 686712e138352..7423d0d5494e9 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -515,4 +515,17 @@ static inline int req_ref_read(struct request *req)
> return atomic_read(&req->ref);
> }
>
> +#ifdef CONFIG_BLK_NVMEM
> +void blk_register_nvmem(struct block_device *bdev);
> +void blk_unregister_nvmem(struct block_device *bdev);
> +#else
> +static inline void blk_register_nvmem(struct block_device *bdev)
> +{
> +}

These could go at the end of the static inline line.

> +
> +static inline void blk_unregister_nvmem(struct block_device *bdev)
> +{
> +}
> +#endif
> +
> #endif /* BLK_INTERNAL_H */
> diff --git a/block/genhd.c b/block/genhd.c
> index 3d287b32d50df..b306e0f407bb2 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -527,6 +527,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
> disk_update_readahead(disk);
> disk_add_events(disk);
> set_bit(GD_ADDED, &disk->state);
> + blk_register_nvmem(disk->part0);
> return 0;
>
> out_unregister_bdi:
> @@ -569,6 +570,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
> if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
> bdev->bd_holder_ops->mark_dead(bdev);
> mutex_unlock(&bdev->bd_holder_lock);
> + blk_unregister_nvmem(bdev);
>
> put_device(&bdev->bd_device);
> rcu_read_lock();
> diff --git a/block/partitions/core.c b/block/partitions/core.c
> index 13a7341299a91..68bd655f5e68e 100644
> --- a/block/partitions/core.c
> +++ b/block/partitions/core.c
> @@ -404,6 +404,8 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
> /* suppress uevent if the disk suppresses it */
> if (!dev_get_uevent_suppress(ddev))
> kobject_uevent(&pdev->kobj, KOBJ_ADD);
> +
> + blk_register_nvmem(bdev);
> return bdev;
>
> out_del:

--
Damien Le Moal
Western Digital Research