Re: [PATCH] cxl/memdev: Avoid mailbox functionality on device memory CXL devices

From: Dave Jiang
Date: Fri Jul 28 2023 - 19:29:39 EST




On 7/28/23 16:00, Ira Weiny wrote:
Using the proposed type-2 cxl-test device[1] the following
splat was observed:

BUG: kernel NULL pointer dereference, address: 0000000000000278
[...]
RIP: 0010:devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
[...]
Call Trace:
<TASK>
? __die+0x1f/0x70
? page_fault_oops+0x149/0x420
? fixup_exception+0x22/0x310
? kernelmode_fixup_or_oops+0x84/0x110
? exc_page_fault+0x6d/0x150
? asm_exc_page_fault+0x22/0x30
? devm_cxl_add_memdev+0x1de/0x2c0 [cxl_core]
cxl_mock_mem_probe+0x632/0x870 [cxl_mock_mem]
platform_probe+0x40/0x90
really_probe+0x19e/0x3e0
? __pfx___driver_attach+0x10/0x10
__driver_probe_device+0x78/0x160
driver_probe_device+0x1f/0x90
__driver_attach+0xce/0x1c0
bus_for_each_dev+0x63/0xa0
bus_add_driver+0x112/0x210
driver_register+0x55/0x100
? __pfx_cxl_mock_mem_driver_init+0x10/0x10 [cxl_mock_mem]
[...]

Commit f6b8ab32e3ec made the mailbox functionality optional. However,
some mailbox functionality was merged after that patch. Therefore some
mailbox functionality can be accessed on a device which did not set up
the mailbox.

While no devices currently exist, commit f6b8ab32e3ec is incomplete.
Complete the checks for memdev state to bring the code to a consistent
state for when type-2 devices are introduced.

[1] https://lore.kernel.org/all/168592160379.1948938.12863272903570476312.stgit@xxxxxxxxxxxxxxxxxxxxxxxxx/

Fixes: f6b8ab32e3ec ("cxl/memdev: Make mailbox functionality optional")
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>


I think you'll need to coordinate with this patch from Davidlohr?
https://lore.kernel.org/linux-cxl/20230726051940.3570-4-dave@xxxxxxxxxxxx/



---
drivers/cxl/core/mbox.c | 9 +++++++++
drivers/cxl/core/memdev.c | 26 ++++++++++++++++++++++++++
drivers/cxl/mem.c | 18 ++++++++++--------
drivers/cxl/pci.c | 5 ++++-
drivers/cxl/pmem.c | 3 +++
5 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index d6d067fbee97..eb1758fb8cdf 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -482,6 +482,9 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
dev_dbg(dev, "Query IOCTL\n");
+ if (!mds)
+ return -EIO;
+
if (get_user(n_commands, &q->n_commands))
return -EFAULT;
@@ -586,6 +589,9 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
dev_dbg(dev, "Send IOCTL\n");
+ if (!mds)
+ return -EIO;
+
if (copy_from_user(&send, s, sizeof(send)))
return -EFAULT;
@@ -1245,6 +1251,9 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
int nr_records = 0;
int rc;
+ if (!mds)
+ return -EIO;
+
rc = mutex_lock_interruptible(&mds->poison.lock);
if (rc)
return rc;
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index f99e7ec3cc40..629e479f751b 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -201,6 +201,19 @@ static ssize_t security_erase_store(struct device *dev,
static struct device_attribute dev_attr_security_erase =
__ATTR(erase, 0200, NULL, security_erase_store);
+static umode_t cxl_memdev_security_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+ if (!mds)
+ return 0;
+
+ return a->mode;
+}
+
static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd)
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
@@ -332,6 +345,9 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa)
struct cxl_region *cxlr;
int rc;
+ if (!mds)
+ return -EIO;
+
if (!IS_ENABLED(CONFIG_DEBUG_FS))
return 0;
@@ -380,6 +396,9 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
struct cxl_region *cxlr;
int rc;
+ if (!mds)
+ return -EIO;
+
if (!IS_ENABLED(CONFIG_DEBUG_FS))
return 0;
@@ -480,6 +499,7 @@ static struct attribute_group cxl_memdev_pmem_attribute_group = {
static struct attribute_group cxl_memdev_security_attribute_group = {
.name = "security",
.attrs = cxl_memdev_security_attributes,
+ .is_visible = cxl_memdev_security_visible,
};
static const struct attribute_group *cxl_memdev_attribute_groups[] = {
@@ -542,6 +562,9 @@ static void cxl_memdev_security_shutdown(struct device *dev)
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+ if (!mds)
+ return;
+
if (mds->security.poll)
cancel_delayed_work_sync(&mds->security.poll_dwork);
}
@@ -997,6 +1020,9 @@ static int cxl_memdev_security_init(struct cxl_memdev *cxlmd)
struct device *dev = &cxlmd->dev;
struct kernfs_node *sec;
+ if (!mds)
+ return 0;
+
sec = sysfs_get_dirent(dev->kobj.sd, "security");
if (!sec) {
dev_err(dev, "sysfs_get_dirent 'security' failed\n");
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 317c7548e4e9..4755a890018d 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -132,12 +132,14 @@ static int cxl_mem_probe(struct device *dev)
dentry = cxl_debugfs_create_dir(dev_name(dev));
debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show);
- if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
- debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
- &cxl_poison_inject_fops);
- if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
- debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
- &cxl_poison_clear_fops);
+ if (mds) {
+ if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds))
+ debugfs_create_file("inject_poison", 0200, dentry, cxlmd,
+ &cxl_poison_inject_fops);
+ if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds))
+ debugfs_create_file("clear_poison", 0200, dentry, cxlmd,
+ &cxl_poison_clear_fops);
+ }
rc = devm_add_action_or_reset(dev, remove_debugfs, dentry);
if (rc)
@@ -222,8 +224,8 @@ static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
struct cxl_memdev_state *mds =
to_cxl_memdev_state(cxlmd->cxlds);
- if (!test_bit(CXL_POISON_ENABLED_LIST,
- mds->poison.enabled_cmds))
+ if (!mds || !test_bit(CXL_POISON_ENABLED_LIST,
+ mds->poison.enabled_cmds))
return 0;
}
return a->mode;
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 1cb1494c28fe..93f6140432cd 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -122,7 +122,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
struct cxl_dev_state *cxlds = dev_id->cxlds;
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
- if (!cxl_mbox_background_complete(cxlds))
+ if (!mds || !cxl_mbox_background_complete(cxlds))
return IRQ_NONE;
reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
@@ -624,6 +624,9 @@ static irqreturn_t cxl_event_thread(int irq, void *id)
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
u32 status;
+ if (!mds)
+ return IRQ_HANDLED;
+
do {
/*
* CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c
index 7cb8994f8809..f1adfdd1a2b3 100644
--- a/drivers/cxl/pmem.c
+++ b/drivers/cxl/pmem.c
@@ -70,6 +70,9 @@ static int cxl_nvdimm_probe(struct device *dev)
struct nvdimm *nvdimm;
int rc;
+ if (WARN_ON_ONCE(!mds))
+ return -EIO;
+
set_exclusive_cxl_commands(mds, exclusive_cmds);
rc = devm_add_action_or_reset(dev, clear_exclusive, mds);
if (rc)

---
base-commit: 20ea1e7d13c1b544fe67c4a8dc3943bb1ab33e6f
change-id: 20230728-cxl-fix-devmemdev-5003ce927f68

Best regards,