[PATCH 14/26] cxl/region: Read existing extents on region creation

From: ira . weiny
Date: Mon Mar 25 2024 - 07:38:37 EST


From: Navneet Singh <navneet.singh@xxxxxxxxx>

Dynamic capacity device extents may be left in an accepted state on a
device due to an unexpected host crash. In this case creation of a new
region on top of the DC partition (region) is expected to expose those
extents for continued use.

Once all endpoint decoders are part of a region and the region is being
realized read the device extent list. For ease of review, this patch
stops after reading the extent list and leaves realization of the region
extents to a future patch.

Signed-off-by: Navneet Singh <navneet.singh@xxxxxxxxx>
Co-developed-by: Ira Weiny <ira.weiny@xxxxxxxxx>
Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>

---
Changes for v1:
[iweiny: remove extent list xarray]
[iweiny: Update spec references to 3.1]
[iweiny: use struct range in extents]
[iweiny: remove all reference tracking and let regions track extents
through the extent devices.]
[djbw/Jonathan/Fan: move extent tracking to endpoint decoders]
---
drivers/cxl/core/core.h | 9 +++
drivers/cxl/core/mbox.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++
drivers/cxl/core/region.c | 29 +++++++
drivers/cxl/cxlmem.h | 49 ++++++++++++
4 files changed, 279 insertions(+)

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 91abeffbe985..119b12362977 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -4,6 +4,8 @@
#ifndef __CXL_CORE_H__
#define __CXL_CORE_H__

+#include <cxlmem.h>
+
extern const struct device_type cxl_nvdimm_bridge_type;
extern const struct device_type cxl_nvdimm_type;
extern const struct device_type cxl_pmu_type;
@@ -28,6 +30,8 @@ void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
int cxl_region_init(void);
void cxl_region_exit(void);
int cxl_get_poison_by_endpoint(struct cxl_port *port);
+int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
+ struct cxl_dc_extent *dc_extent);
#else
static inline int cxl_get_poison_by_endpoint(struct cxl_port *port)
{
@@ -43,6 +47,11 @@ static inline int cxl_region_init(void)
static inline void cxl_region_exit(void)
{
}
+static inline int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
+ struct cxl_dc_extent *dc_extent)
+{
+ return 0;
+}
#define CXL_REGION_ATTR(x) NULL
#define CXL_REGION_TYPE(x) NULL
#define SET_CXL_REGION_ATTR(x)
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 58b31fa47b93..9e33a0976828 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -870,6 +870,53 @@ int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
}
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);

+static int cxl_validate_extent(struct cxl_memdev_state *mds,
+ struct cxl_dc_extent *dc_extent)
+{
+ struct device *dev = mds->cxlds.dev;
+ uint64_t start, len;
+
+ start = le64_to_cpu(dc_extent->start_dpa);
+ len = le64_to_cpu(dc_extent->length);
+
+ /* Extents must not cross region boundary's */
+ for (int i = 0; i < mds->nr_dc_region; i++) {
+ struct cxl_dc_region_info *dcr = &mds->dc_region[i];
+
+ if (dcr->base <= start &&
+ (start + len) <= (dcr->base + dcr->decode_len)) {
+ dev_dbg(dev, "DC extent DPA %#llx - %#llx (DCR:%d:%#llx)\n",
+ start, start + len - 1, i, start - dcr->base);
+ return 0;
+ }
+ }
+
+ dev_err_ratelimited(dev,
+ "DC extent DPA %#llx - %#llx is not in any DC region\n",
+ start, start + len - 1);
+ return -EINVAL;
+}
+
+static bool cxl_dc_extent_in_ed(struct cxl_endpoint_decoder *cxled,
+ struct cxl_dc_extent *extent)
+{
+ uint64_t start = le64_to_cpu(extent->start_dpa);
+ uint64_t length = le64_to_cpu(extent->length);
+ struct range ext_range = (struct range){
+ .start = start,
+ .end = start + length - 1,
+ };
+ struct range ed_range = (struct range) {
+ .start = cxled->dpa_res->start,
+ .end = cxled->dpa_res->end,
+ };
+
+ dev_dbg(&cxled->cxld.dev, "Checking ED (%pr) for extent DPA:%#llx LEN:%#llx\n",
+ cxled->dpa_res, start, length);
+
+ return range_contains(&ed_range, &ext_range);
+}
+
void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
enum cxl_event_log_type type,
enum cxl_event_type event_type,
@@ -973,6 +1020,15 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
return rc;
}

+static struct cxl_memdev_state *
+cxled_to_mds(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ return container_of(cxlds, struct cxl_memdev_state, cxlds);
+}
+
static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
enum cxl_event_log_type type)
{
@@ -1406,6 +1462,142 @@ int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds)
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_dynamic_capacity_identify, CXL);

+static int cxl_dev_get_dc_extent_cnt(struct cxl_memdev_state *mds,
+ unsigned int *extent_gen_num)
+{
+ struct cxl_mbox_get_dc_extent_in get_dc_extent;
+ struct cxl_mbox_get_dc_extent_out dc_extents;
+ struct cxl_mbox_cmd mbox_cmd;
+ unsigned int count;
+ int rc;
+
+ get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
+ .extent_cnt = cpu_to_le32(0),
+ .start_extent_index = cpu_to_le32(0),
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
+ .payload_in = &get_dc_extent,
+ .size_in = sizeof(get_dc_extent),
+ .size_out = sizeof(dc_extents),
+ .payload_out = &dc_extents,
+ .min_out = 1,
+ };
+
+ rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ if (rc < 0)
+ return rc;
+
+ count = le32_to_cpu(dc_extents.total_extent_cnt);
+ *extent_gen_num = le32_to_cpu(dc_extents.extent_list_num);
+
+ return count;
+}
+
+static int cxl_dev_get_dc_extents(struct cxl_endpoint_decoder *cxled,
+ unsigned int start_gen_num,
+ unsigned int exp_cnt)
+{
+ struct cxl_memdev_state *mds = cxled_to_mds(cxled);
+ unsigned int start_index, total_read;
+ struct device *dev = mds->cxlds.dev;
+ struct cxl_mbox_cmd mbox_cmd;
+
+ struct cxl_mbox_get_dc_extent_out *dc_extents __free(kfree) =
+ kvmalloc(mds->payload_size, GFP_KERNEL);
+ if (!dc_extents)
+ return -ENOMEM;
+
+ total_read = 0;
+ start_index = 0;
+ do {
+ unsigned int nr_ext, total_extent_cnt, gen_num;
+ struct cxl_mbox_get_dc_extent_in get_dc_extent;
+ int rc;
+
+ get_dc_extent = (struct cxl_mbox_get_dc_extent_in) {
+ .extent_cnt = cpu_to_le32(exp_cnt - start_index),
+ .start_extent_index = cpu_to_le32(start_index),
+ };
+
+ mbox_cmd = (struct cxl_mbox_cmd) {
+ .opcode = CXL_MBOX_OP_GET_DC_EXTENT_LIST,
+ .payload_in = &get_dc_extent,
+ .size_in = sizeof(get_dc_extent),
+ .size_out = mds->payload_size,
+ .payload_out = dc_extents,
+ .min_out = 1,
+ };
+
+ rc = cxl_internal_send_cmd(mds, &mbox_cmd);
+ if (rc < 0)
+ return rc;
+
+ nr_ext = le32_to_cpu(dc_extents->ret_extent_cnt);
+ total_read += nr_ext;
+ total_extent_cnt = le32_to_cpu(dc_extents->total_extent_cnt);
+ gen_num = le32_to_cpu(dc_extents->extent_list_num);
+
+ dev_dbg(dev, "Get extent list count:%d generation Num:%d\n",
+ total_extent_cnt, gen_num);
+
+ if (gen_num != start_gen_num || exp_cnt != total_extent_cnt) {
+ dev_err(dev, "Possible incomplete extent list; gen %u != %u : cnt %u != %u\n",
+ gen_num, start_gen_num, exp_cnt, total_extent_cnt);
+ return -EIO;
+ }
+
+ for (int i = 0; i < nr_ext ; i++) {
+ dev_dbg(dev, "Processing extent %d/%d\n",
+ start_index + i, exp_cnt);
+ rc = cxl_validate_extent(mds, &dc_extents->extent[i]);
+ if (rc)
+ continue;
+ if (!cxl_dc_extent_in_ed(cxled, &dc_extents->extent[i]))
+ continue;
+ rc = cxl_ed_add_one_extent(cxled, &dc_extents->extent[i]);
+ if (rc)
+ return rc;
+ }
+
+ start_index += nr_ext;
+ } while (exp_cnt > total_read);
+
+ return 0;
+}
+
+/**
+ * cxl_read_dc_extents() - Read any existing extents
+ * @cxled: Endpoint decoder which is part of a region
+ *
+ * Issue the Get Dynamic Capacity Extent List command to the device
+ * and add any existing extents found which belong to this decoder.
+ *
+ * Return: 0 if command was executed successfully, -ERRNO on error.
+ */
+int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev_state *mds = cxled_to_mds(cxled);
+ struct device *dev = mds->cxlds.dev;
+ unsigned int extent_gen_num;
+ int rc;
+
+ if (!cxl_dcd_supported(mds)) {
+ dev_dbg(dev, "DCD unsupported\n");
+ return 0;
+ }
+
+ rc = cxl_dev_get_dc_extent_cnt(mds, &extent_gen_num);
+ dev_dbg(mds->cxlds.dev, "Extent count: %d Generation Num: %d\n",
+ rc, extent_gen_num);
+ if (rc <= 0) /* 0 == no records found */
+ return rc;
+
+ return cxl_dev_get_dc_extents(cxled, extent_gen_num, rc);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_read_dc_extents, CXL);
+
static int add_dpa_res(struct device *dev, struct resource *parent,
struct resource *res, resource_size_t start,
resource_size_t size, const char *type)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 0d7b09a49dcf..3e563ab29afe 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -1450,6 +1450,13 @@ static int cxl_region_validate_position(struct cxl_region *cxlr,
return 0;
}

+/* Callers are expected to ensure cxled has been attached to a region */
+int cxl_ed_add_one_extent(struct cxl_endpoint_decoder *cxled,
+ struct cxl_dc_extent *dc_extent)
+{
+ return 0;
+}
+
static int cxl_region_attach_position(struct cxl_region *cxlr,
struct cxl_root_decoder *cxlrd,
struct cxl_endpoint_decoder *cxled,
@@ -2773,6 +2780,22 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
return rc;
}

+static int cxl_region_read_extents(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int i;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ int rc;
+
+ rc = cxl_read_dc_extents(p->targets[i]);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
static void cxlr_dax_unregister(void *_cxlr_dax)
{
struct cxl_dax_region *cxlr_dax = _cxlr_dax;
@@ -2807,6 +2830,12 @@ static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
dev_name(dev));

+ if (cxlr->mode == CXL_REGION_DC) {
+ rc = cxl_region_read_extents(cxlr);
+ if (rc)
+ goto err;
+ }
+
return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
cxlr_dax);
err:
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 01bee6eedff3..8f2d8944d334 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -604,6 +604,54 @@ enum cxl_opcode {
UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f, 0xd6, 0x07, 0x19, \
0x40, 0x3d, 0x86)

+/*
+ * Add Dynamic Capacity Response
+ * CXL rev 3.1 section 8.2.9.9.9.3; Table 8-168 & Table 8-169
+ */
+struct cxl_mbox_dc_response {
+ __le32 extent_list_size;
+ u8 flags;
+ u8 reserved[3];
+ struct updated_extent_list {
+ __le64 dpa_start;
+ __le64 length;
+ u8 reserved[8];
+ } __packed extent_list[];
+} __packed;
+
+/*
+ * CXL rev 3.1 section 8.2.9.2.1.6; Table 8-51
+ */
+#define CXL_DC_EXTENT_TAG_LEN 0x10
+struct cxl_dc_extent {
+ __le64 start_dpa;
+ __le64 length;
+ u8 tag[CXL_DC_EXTENT_TAG_LEN];
+ __le16 shared_extn_seq;
+ u8 reserved[6];
+} __packed;
+
+/*
+ * Get Dynamic Capacity Extent List; Input Payload
+ * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-166
+ */
+struct cxl_mbox_get_dc_extent_in {
+ __le32 extent_cnt;
+ __le32 start_extent_index;
+} __packed;
+
+/*
+ * Get Dynamic Capacity Extent List; Output Payload
+ * CXL rev 3.1 section 8.2.9.9.9.2; Table 8-167
+ */
+struct cxl_mbox_get_dc_extent_out {
+ __le32 ret_extent_cnt;
+ __le32 total_extent_cnt;
+ __le32 extent_list_num;
+ u8 rsvd[4];
+ struct cxl_dc_extent extent[];
+} __packed;
+
struct cxl_mbox_get_supported_logs {
__le16 entries;
u8 rsvd[6];
@@ -879,6 +927,7 @@ int cxl_internal_send_cmd(struct cxl_memdev_state *mds,
struct cxl_mbox_cmd *cmd);
int cxl_dev_state_identify(struct cxl_memdev_state *mds);
int cxl_dev_dynamic_capacity_identify(struct cxl_memdev_state *mds);
+int cxl_read_dc_extents(struct cxl_endpoint_decoder *cxled);
int cxl_await_media_ready(struct cxl_dev_state *cxlds);
int cxl_enumerate_cmds(struct cxl_memdev_state *mds);
int cxl_mem_create_range_info(struct cxl_memdev_state *mds);

--
2.44.0