Re: [PATCH RESEND v6 2/9] dax: Introduce holder for dax_device

From: Dan Williams
Date: Fri Aug 20 2021 - 16:19:34 EST


On Fri, Jul 30, 2021 at 3:02 AM Shiyang Ruan <ruansy.fnst@xxxxxxxxxxx> wrote:
>
> To easily track filesystem from a pmem device, we introduce a holder for
> dax_device structure, and also its operation. This holder is used to
> remember who is using this dax_device:
> - When it is the backend of a filesystem, the holder will be the
> superblock of this filesystem.
> - When this pmem device is one of the targets in a mapped device, the
> holder will be this mapped device. In this case, the mapped device
> has its own dax_device and it will follow the first rule. So that we
> can finally track to the filesystem we needed.
>
> The holder and holder_ops will be set when filesystem is being mounted,
> or an target device is being activated.
>
> Signed-off-by: Shiyang Ruan <ruansy.fnst@xxxxxxxxxxx>
> ---
> drivers/dax/super.c | 46 +++++++++++++++++++++++++++++++++++++++++++++
> include/linux/dax.h | 17 +++++++++++++++++
> 2 files changed, 63 insertions(+)
>
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index 5fa6ae9dbc8b..00c32dfa5665 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -214,6 +214,8 @@ enum dax_device_flags {
> * @cdev: optional character interface for "device dax"
> * @host: optional name for lookups where the device path is not available
> * @private: dax driver private data
> + * @holder_rwsem: prevent unregistration while holder_ops is in progress
> + * @holder_data: holder of a dax_device: could be filesystem or mapped device
> * @flags: state and boolean properties
> */
> struct dax_device {
> @@ -222,8 +224,11 @@ struct dax_device {
> struct cdev cdev;
> const char *host;
> void *private;
> + struct rw_semaphore holder_rwsem;
> + void *holder_data;
> unsigned long flags;
> const struct dax_operations *ops;
> + const struct dax_holder_operations *holder_ops;
> };
>
> static ssize_t write_cache_show(struct device *dev,
> @@ -373,6 +378,25 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> }
> EXPORT_SYMBOL_GPL(dax_zero_page_range);
>
> +int dax_holder_notify_failure(struct dax_device *dax_dev, loff_t offset,
> + size_t size, void *data)
I took a look at patch3 and had some questions about the api.

Can you add kernel-doc for this api and specifically clarify what is
@data used for vs dax_dev->holder_data?

I also think the holder needs to know whether this failure is being
signaled synchronously. or asynchronously. In the synchronous case a
process has consumed poison and action needs to be taken immediately.
In the asynchronous case the driver stack has encountered failed
address ranges and is notifying the holder to avoid those ranges, but
no immediate action needs to be taken to shoot down mappings. For
example, I would use the synchronous notification when
memory_failure() is invoked with the "action required" indication, and
the asynchronous notification when an NVDIMM_REVALIDATE_POISON event
fires, or the "action optional" memory_failure() case.

In short I think the interface just needs a flags argument.


> +{
> + int rc;
> +
> + if (!dax_dev)
> + return -ENXIO;
> +
> + if (!dax_dev->holder_data)
> + return -EOPNOTSUPP;
> +
> + down_read(&dax_dev->holder_rwsem);
> + rc = dax_dev->holder_ops->notify_failure(dax_dev, offset,
> + size, data);
> + up_read(&dax_dev->holder_rwsem);
> + return rc;
> +}
> +EXPORT_SYMBOL_GPL(dax_holder_notify_failure);
> +
> #ifdef CONFIG_ARCH_HAS_PMEM_API
> void arch_wb_cache_pmem(void *addr, size_t size);
> void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
> @@ -603,6 +627,7 @@ struct dax_device *alloc_dax(void *private, const char *__host,
> dax_add_host(dax_dev, host);
> dax_dev->ops = ops;
> dax_dev->private = private;
> + init_rwsem(&dax_dev->holder_rwsem);
> if (flags & DAXDEV_F_SYNC)
> set_dax_synchronous(dax_dev);
>
> @@ -624,6 +649,27 @@ void put_dax(struct dax_device *dax_dev)
> }
> EXPORT_SYMBOL_GPL(put_dax);
>
> +void dax_set_holder(struct dax_device *dax_dev, void *holder,
> + const struct dax_holder_operations *ops)
> +{
> + if (!dax_dev)
> + return;
> + down_write(&dax_dev->holder_rwsem);
> + dax_dev->holder_data = holder;
> + dax_dev->holder_ops = ops;
> + up_write(&dax_dev->holder_rwsem);
> +}
> +EXPORT_SYMBOL_GPL(dax_set_holder);
> +
> +void *dax_get_holder(struct dax_device *dax_dev)
> +{
> + if (!dax_dev)
> + return NULL;
> +
> + return dax_dev->holder_data;
> +}
> +EXPORT_SYMBOL_GPL(dax_get_holder);
> +
> /**
> * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
> * @host: alternate name for the device registered by a dax driver
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index b52f084aa643..6f4b5c97ceb0 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -38,10 +38,17 @@ struct dax_operations {
> int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
> };
>
> +struct dax_holder_operations {
> + int (*notify_failure)(struct dax_device *, loff_t, size_t, void *);
> +};
> +
> extern struct attribute_group dax_attribute_group;
>
> #if IS_ENABLED(CONFIG_DAX)
> struct dax_device *dax_get_by_host(const char *host);
> +void dax_set_holder(struct dax_device *dax_dev, void *holder,
> + const struct dax_holder_operations *ops);
> +void *dax_get_holder(struct dax_device *dax_dev);
> struct dax_device *alloc_dax(void *private, const char *host,
> const struct dax_operations *ops, unsigned long flags);
> void put_dax(struct dax_device *dax_dev);
> @@ -77,6 +84,14 @@ static inline struct dax_device *dax_get_by_host(const char *host)
> {
> return NULL;
> }
> +static inline void dax_set_holder(struct dax_device *dax_dev, void *holder,
> + const struct dax_holder_operations *ops)
> +{
> +}
> +static inline void *dax_get_holder(struct dax_device *dax_dev)
> +{
> + return NULL;
> +}
> static inline struct dax_device *alloc_dax(void *private, const char *host,
> const struct dax_operations *ops, unsigned long flags)
> {
> @@ -226,6 +241,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
> size_t bytes, struct iov_iter *i);
> int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
> size_t nr_pages);
> +int dax_holder_notify_failure(struct dax_device *dax_dev, loff_t offset,
> + size_t size, void *data);
> void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
>
> ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
> --
> 2.32.0
>
>
>