[PATCH RFC v2 09/18] cxl/mem: Read extents on memory device discovery

From: Ira Weiny
Date: Tue Aug 29 2023 - 01:34:19 EST


When a Dynamic Capacity Device (DCD) is realized some extents may
already be available within the DC Regions. This can happen if the host
has accepted extents and been rebooted or any other time the host driver
software has become out of sync with the device hardware.

Read the available extents during probe and store them for later
use.

Signed-off-by: Navneet Singh <navneet.singh@xxxxxxxxx>
Co-developed-by: Navneet Singh <navneet.singh@xxxxxxxxx>
Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>

---
Change for v2:
[iweiny: new patch]
---
drivers/cxl/core/mbox.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++++
drivers/cxl/cxlmem.h | 36 +++++++++
drivers/cxl/pci.c | 4 +
3 files changed, 235 insertions(+)

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index d769814f80e2..9b08c40ef484 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -824,6 +824,37 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
}
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);

+static int cxl_store_dc_extent(struct cxl_memdev_state *mds,
+ struct cxl_dc_extent *dc_extent)
+{
+ struct device *dev = mds->cxlds.dev;
+ struct cxl_dc_extent_data *extent;
+ int rc;
+
+ extent = kzalloc(sizeof(*extent), GFP_KERNEL);
+ if (!extent)
+ return -ENOMEM;
+
+ extent->dpa_start = le64_to_cpu(dc_extent->start_dpa);
+ extent->length = le64_to_cpu(dc_extent->length);
+ memcpy(extent->tag, dc_extent->tag, sizeof(extent->tag));
+ extent->shared_extent_seq = le16_to_cpu(dc_extent->shared_extn_seq);
+
+ dev_dbg(dev, "dynamic capacity extent DPA:0x%llx LEN:%llx\n",
+ extent->dpa_start, extent->length);
+
+ rc = xa_insert(&mds->dc_extent_list, extent->dpa_start, extent,
+ GFP_KERNEL);
+ if (rc) {
+ if (rc == -EBUSY)
+ dev_warn_once(dev, "Duplicate extent DPA:%llx LEN:%llx\n",
+ extent->dpa_start, extent->length);
+ kfree(extent);
+ }
+
+ return rc;
+}
+
/*
* General Media Event Record
* CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
@@ -1339,6 +1370,149 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);

+static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
+ unsigned int *extent_gen_num)
+{
+ struct cxl_mbox_get_dc_extent get_dc_extent;
+ struct cxl_mbox_dc_extents dc_extents;
+ struct device *dev = mds->cxlds.dev;
+ struct cxl_mbox_cmd mbox_cmd;
+ unsigned int count;
+ int rc;
+
+ /* Check GET_DC_EXTENT_LIST is supported by device */
+ if (!test_bit(CXL_DCD_ENABLED_GET_EXTENT_LIST, mds->dcd_cmds)) {
+ dev_dbg(dev, "unsupported cmd : get dyn cap extent list\n");
+ return 0;
+ }
+
+ get_dc_extent = (struct cxl_mbox_get_dc_extent) {
+ .extent_cnt = cpu_to_le32(0),
+ .start_extent_index = cpu_to_le32(0),
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
+ .payload_in = &get_dc_extent,
+ .size_in = sizeof(get_dc_extent),
+ .size_out = mds->payload_size,
+ .payload_out = &dc_extents,
+ .min_out = 1,
+ };
+
+ rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ if (rc < 0)
+ return rc;
+
+ count = le32_to_cpu(dc_extents.total_extent_cnt);
+ *extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
+
+ return count;
+}
+
+static int cxl_dev_get_dc_extents(struct cxl_memdev_state *mds,
+ unsigned int start_gen_num,
+ unsigned int exp_cnt)
+{
+ struct cxl_mbox_dc_extents *dc_extents;
+ unsigned int start_index, total_read;
+ struct device *dev = mds->cxlds.dev;
+ struct cxl_mbox_cmd mbox_cmd;
+ int retry = 3;
+ int rc;
+
+ /* Check GET_DC_EXTENT_LIST is supported by device */
+ if (!test_bit(CXL_DCD_ENABLED_GET_EXTENT_LIST, mds->dcd_cmds)) {
+ dev_dbg(dev, "unsupported cmd : get dyn cap extent list\n");
+ return 0;
+ }
+
+ dc_extents = kvmalloc(mds->payload_size, GFP_KERNEL);
+ if (!dc_extents)
+ return -ENOMEM;
+
+reset:
+ total_read = 0;
+ start_index = 0;
+ do {
+ unsigned int nr_ext, total_extent_cnt, gen_num;
+ struct cxl_mbox_get_dc_extent get_dc_extent;
+
+ get_dc_extent = (struct cxl_mbox_get_dc_extent) {
+ .extent_cnt = exp_cnt - start_index,
+ .start_extent_index = start_index,
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
+ .payload_in = &get_dc_extent,
+ .size_in = sizeof(get_dc_extent),
+ .size_out = mds->payload_size,
+ .payload_out = dc_extents,
+ .min_out = 1,
+ };
+
+ rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ if (rc < 0)
+ goto out;
+
+ nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
+ total_read += nr_ext;
+ total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
+ gen_num = le32_to_cpu(dc_extents->extent_list_num);
+
+ dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
+ total_extent_cnt, gen_num);
+
+ if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
+ dev_err(dev, "Extent list changed while reading; %u != %u : %u != %u\n",
+ gen_num, start_gen_num, exp_cnt, total_extent_cnt);
+ if (retry--)
+ goto reset;
+ return -EIO;
+ }
+
+ for (int i = 0; i < nr_ext ; i++) {
+ dev_dbg(dev, "Storing extent %d/%d\n",
+ start_index + i, exp_cnt);
+ rc = cxl_store_dc_extent(mds, &dc_extents->extent[i]);
+ if (rc)
+ goto out;
+ }
+
+ start_index += nr_ext;
+ } while (exp_cnt > total_read);
+
+out:
+ kvfree(dc_extents);
+ return rc;
+}
+
+/**
+ * cxl_dev_get_dynamic_capacity_extents() - Reads the dynamic capacity
+ * extent list.
+ * @mds: The memory device state
+ *
+ * This will dispatch the get_dynamic_capacity_extent_list command to the device
+ * and on success add the extents to the host managed extent list.
+ *
+ * Return: 0 if command was executed successfully, -ERRNO on error.
+ */
+int cxl_dev_get_dynamic_capacity_extents(struct cxl_memdev_state *mds)
+{
+ unsigned int extent_gen_num;
+ int rc;
+
+ rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
+ dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",
+ rc, extent_gen_num);
+ if (rc <= 0) /* 0 == no records found */
+ return rc;
+
+ return cxl_dev_get_dc_extents(mds, extent_gen_num, rc);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dev_get_dynamic_capacity_extents, CXL);
+
static int add_dpa_res(struct device *dev, struct resource *parent,
struct resource *res, resource_size_t start,
resource_size_t size, const char *type)
@@ -1530,9 +1704,23 @@ int cxl_poison_state_init(struct cxl_memdev_state *mds)
}
EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);

+static void cxl_destroy_mds(void *_mds)
+{
+ struct cxl_memdev_state *mds = _mds;
+ struct cxl_dc_extent_data *extent;
+ unsigned long index;
+
+ xa_for_each(&mds->dc_extent_list, index, extent) {
+ xa_erase(&mds->dc_extent_list, index);
+ kfree(extent);
+ }
+ xa_destroy(&mds->dc_extent_list);
+}
+
struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
{
struct cxl_memdev_state *mds;
+ int rc;

mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL);
if (!mds) {
@@ -1544,6 +1732,13 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mutex_init(&mds->event.log_lock);
mds->cxlds.dev = dev;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
+ xa_init(&mds->dc_extent_list);
+
+ rc = devm_add_action_or_reset(dev, cxl_destroy_mds, mds);
+ if (rc) {
+ dev_err(dev, "Failed to set up memdev state; %d\n", rc);
+ return ERR_PTR(rc);
+ }

return mds;
}
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 8c8f47b397ab..ad690600c1b9 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -6,6 +6,7 @@
#include <linux/cdev.h>
#include <linux/uuid.h>
#include <linux/rcuwait.h>
+#include <linux/xarray.h>
#include "cxl.h"

/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
@@ -509,6 +510,7 @@ struct cxl_memdev_state {
u8 nr_dc_region;
struct cxl_dc_region_info dc_region[CXL_MAX_DC_REGION];
size_t dc_event_log_size;
+ struct xarray dc_extent_list;

struct cxl_event_state event;
struct cxl_poison_state poison;
@@ -749,6 +751,26 @@ struct cxl_event_mem_module {
u8 reserved[0x3d];
} __packed;

+#define CXL_DC_EXTENT_TAG_LEN 0x10
+struct cxl_dc_extent_data {
+ u64 dpa_start;
+ u64 length;
+ u8 tag[CXL_DC_EXTENT_TAG_LEN];
+ u16 shared_extent_seq;
+};
+
+/*
+ * Dynamic Capacity Event Record
+ * CXL rev 3.0 section 8.2.9.2.1.5; Table 8-47
+ */
+struct cxl_dc_extent {
+ __le64 start_dpa;
+ __le64 length;
+ u8 tag[CXL_DC_EXTENT_TAG_LEN];
+ __le16 shared_extn_seq;
+ u8 reserved[6];
+} __packed;
+
struct cxl_mbox_get_partition_info {
__le64 active_volatile_cap;
__le64 active_persistent_cap;
@@ -796,6 +818,19 @@ struct cxl_mbox_dynamic_capacity {
#define CXL_REGIONS_RETURNED(size_out) \
((size_out - 8) / sizeof(struct cxl_dc_region_config))

+struct cxl_mbox_get_dc_extent {
+ __le32 extent_cnt;
+ __le32 start_extent_index;
+} __packed;
+
+struct cxl_mbox_dc_extents {
+ __le32 ret_extent_cnt;
+ __le32 total_extent_cnt;
+ __le32 extent_list_num;
+ u8 rsvd[4];
+ struct cxl_dc_extent extent[];
+} __packed;
+
/* Set Timestamp CXL 3.0 Spec 8.2.9.4.2 */
struct cxl_mbox_set_timestamp_in {
__le64 timestamp;
@@ -920,6 +955,7 @@ int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
struct cxl_mbox_cmd *cmd);
int cxl_dev_state_identify(struct cxl_memdev_state *mds);
int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds);
+int cxl_dev_get_dynamic_capacity_extents(struct cxl_memdev_state *mds);
int cxl_await_media_ready(struct cxl_dev_state *cxlds);
int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
int cxl_mem_create_range_info(struct cxl_memdev_state *mds);
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index a9b110ff1176..10c1a583113c 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -930,6 +930,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
dev_dbg(&pdev->dev, "No RAS reporting unmasked\n");

+ rc = cxl_dev_get_dynamic_capacity_extents(mds);
+ if (rc)
+ return rc;
+
pci_save_state(pdev);

return rc;

--
2.41.0