Re: [PATCH RFC v2 12/18] cxl/region: Notify regions of DC changes

From: Jonathan Cameron
Date: Tue Aug 29 2023 - 12:41:55 EST


On Mon, 28 Aug 2023 22:21:03 -0700
Ira Weiny <ira.weiny@xxxxxxxxx> wrote:

> In order for a user to use dynamic capacity effectively they need to
> know when dynamic capacity is available. Thus when Dynamic Capacity
> (DC) extents are added or removed by a DC device the regions affected
> need to be notified. Ultimately the DAX region uses the memory
> associated with DC extents. However, remember that CXL DAX regions
> maintain any interleave details between devices.
>
> When a DCD event occurs, iterate all CXL endpoint decoders and notify
> regions which contain the endpoints affected by the event. In turn
> notify the DAX regions of the changes to the DAX region extents.
>
> For now interleave is handled by creating simple 1:1 mappings between
> the CXL DAX region and DAX region layers. Future implementations will
> need to resolve when to actually surface a DAX region extent and pass
> the notification along.
>
> Remember that adding capacity is safe because there is no chance of the
> memory being in use. Also remember at this point releasing capacity is
> straight forward because DAX devices do not yet have references to the
> extents. Future patches will handle that complication.
>
> Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>
>

A few trivial comments on this. Lot here so I'll take a closer look
at some point after doing a light pass over the rest of the series.





> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 80cffa40e91a..d3c4c9c87392 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -104,6 +104,55 @@ static int cxl_debugfs_poison_clear(void *data, u64 dpa)
> DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL,
> cxl_debugfs_poison_clear, "%llx\n");
>
> +static int match_ep_decoder_by_range(struct device *dev, void *data)
> +{
> + struct cxl_dc_extent_data *extent = data;
> + struct cxl_endpoint_decoder *cxled;
> +
> + if (!is_endpoint_decoder(dev))
> + return 0;

blank line

> + cxled = to_cxl_endpoint_decoder(dev);
> + return cxl_dc_extent_in_ed(cxled, extent);
> +}
> +
> +static struct cxl_endpoint_decoder *cxl_find_ed(struct cxl_memdev_state *mds,
> + struct cxl_dc_extent_data *extent)
> +{
> + struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
> + struct cxl_port *endpoint = cxlmd->endpoint;
> + struct device *dev;
> +
> + dev = device_find_child(&endpoint->dev, extent,
> + match_ep_decoder_by_range);
> + if (!dev) {
> + dev_dbg(mds->cxlds.dev, "Extent DPA:%llx LEN:%llx not mapped\n",
> + extent->dpa_start, extent->length);
> + return NULL;
> + }
> +
> + return to_cxl_endpoint_decoder(dev);
> +}
> +
> +static int cxl_mem_notify(struct device *dev, struct cxl_drv_nd *nd)
> +{
> + struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
> + struct cxl_endpoint_decoder *cxled;
> + struct cxl_dc_extent_data *extent;
> + int rc = 0;
> +
> + extent = nd->extent;
> + dev_dbg(dev, "notify DC action %d DPA:%llx LEN:%llx\n",
> + nd->event, extent->dpa_start, extent->length);
> +
> + cxled = cxl_find_ed(mds, extent);
> + if (!cxled)
> + return 0;
Blank line.

> + rc = cxl_ed_notify_extent(cxled, nd);
> + put_device(&cxled->cxld.dev);
> + return rc;
> +}
> +
> static int cxl_mem_probe(struct device *dev)
> {
> struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> @@ -247,6 +296,7 @@ __ATTRIBUTE_GROUPS(cxl_mem);
> static struct cxl_driver cxl_mem_driver = {
> .name = "cxl_mem",
> .probe = cxl_mem_probe,
> + .notify = cxl_mem_notify,
> .id = CXL_DEVICE_MEMORY_EXPANDER,
> .drv = {
> .dev_groups = cxl_mem_groups,
> diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c
> index 057b00b1d914..44cbd28668f1 100644
> --- a/drivers/dax/cxl.c
> +++ b/drivers/dax/cxl.c
> @@ -59,6 +59,29 @@ static int cxl_dax_region_create_extent(struct dax_region *dax_region,
> return 0;
> }
>
> +static int cxl_dax_region_add_extent(struct cxl_dax_region *cxlr_dax,
> + struct cxl_dr_extent *cxl_dr_ext)
> +{

Why not have this helper in the earlier patch that introduced the code
this is factoring out? Will reduce churn in the set whilst not much hurting
readability of that patch.

> + /*
> + * get not zero is important because this is racing with the
> + * region driver which is racing with the memory device which
> + * could be removing the extent at the same time.
> + */
> + if (cxl_dr_extent_get_not_zero(cxl_dr_ext)) {
> + struct dax_region *dax_region;
> + int rc;
> +
> + dax_region = dev_get_drvdata(&cxlr_dax->dev);
> + dev_dbg(&cxlr_dax->dev, "Creating HPA:%llx LEN:%llx\n",
> + cxl_dr_ext->hpa_offset, cxl_dr_ext->hpa_length);
> + rc = cxl_dax_region_create_extent(dax_region, cxl_dr_ext);
> + cxl_dr_extent_put(cxl_dr_ext);
> + if (rc)
> + return rc;
> + }
> + return 0;
Perhaps flip logic
if (!cxl_dr_extent_get_not_zero())
return 0;

etc to reduce the code indent.
> +}
> +
> static int cxl_dax_region_create_extents(struct cxl_dax_region *cxlr_dax)
> {
> struct cxl_dr_extent *cxl_dr_ext;
> @@ -66,27 +89,68 @@ static int cxl_dax_region_create_extents(struct cxl_dax_region *cxlr_dax)
>
> dev_dbg(&cxlr_dax->dev, "Adding extents\n");
> xa_for_each(&cxlr_dax->extents, index, cxl_dr_ext) {
> - /*
> - * get not zero is important because this is racing with the
> - * region driver which is racing with the memory device which
> - * could be removing the extent at the same time.
> - */
> - if (cxl_dr_extent_get_not_zero(cxl_dr_ext)) {
> - struct dax_region *dax_region;
> - int rc;
> -
> - dax_region = dev_get_drvdata(&cxlr_dax->dev);
> - dev_dbg(&cxlr_dax->dev, "Found OFF:%llx LEN:%llx\n",
> - cxl_dr_ext->hpa_offset, cxl_dr_ext->hpa_length);
> - rc = cxl_dax_region_create_extent(dax_region, cxl_dr_ext);
> - cxl_dr_extent_put(cxl_dr_ext);
> - if (rc)
> - return rc;
> - }
> + int rc;
> +
> + rc = cxl_dax_region_add_extent(cxlr_dax, cxl_dr_ext);
> + if (rc)
> + return rc;
> }
> return 0;
> }
>
> +static int match_cxl_dr_extent(struct device *dev, void *data)
> +{
> + struct dax_reg_ext_dev *dr_reg_ext_dev;
> + struct dax_region_extent *dr_extent;
> +
> + if (!is_dr_ext_dev(dev))
> + return 0;
> +
> + dr_reg_ext_dev = to_dr_ext_dev(dev);
> + dr_extent = dr_reg_ext_dev->dr_extent;
> + return data == dr_extent->private_data;
> +}
> +
> +static int cxl_dax_region_rm_extent(struct cxl_dax_region *cxlr_dax,
> + struct cxl_dr_extent *cxl_dr_ext)
> +{
> + struct dax_reg_ext_dev *dr_reg_ext_dev;
> + struct dax_region *dax_region;
> + struct device *dev;
> +
> + dev = device_find_child(&cxlr_dax->dev, cxl_dr_ext,
> + match_cxl_dr_extent);
> + if (!dev)
> + return -EINVAL;

blank line.

> + dr_reg_ext_dev = to_dr_ext_dev(dev);
> + put_device(dev);
> + dax_region = dev_get_drvdata(&cxlr_dax->dev);
> + dax_region_ext_del_dev(dax_region, dr_reg_ext_dev);
blank line

> + return 0;
> +}
> +
> +static int cxl_dax_region_notify(struct device *dev,
> + struct cxl_drv_nd *nd)
> +{
> + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
> + struct cxl_dr_extent *cxl_dr_ext = nd->cxl_dr_ext;
> + int rc = 0;
> +
> + switch (nd->event) {
> + case DCD_ADD_CAPACITY:
> + rc = cxl_dax_region_add_extent(cxlr_dax, cxl_dr_ext);
> + break;

Early returns in here will perhaps make this more readable and definitely
make it more compact.

> + case DCD_RELEASE_CAPACITY:
> + case DCD_FORCED_CAPACITY_RELEASE:
> + rc = cxl_dax_region_rm_extent(cxlr_dax, cxl_dr_ext);
> + break;
> + default:
> + dev_err(&cxlr_dax->dev, "Unknown DC event %d\n", nd->event);
> + break;
> + }
> + return rc;
> +}
> +
> static int cxl_dax_region_probe(struct device *dev)
> {
> struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev);
> @@ -134,6 +198,7 @@ static int cxl_dax_region_probe(struct device *dev)
> static struct cxl_driver cxl_dax_region_driver = {
> .name = "cxl_dax_region",
> .probe = cxl_dax_region_probe,
> + .notify = cxl_dax_region_notify,
> .id = CXL_DEVICE_DAX_REGION,
> .drv = {
> .suppress_bind_attrs = true,