Re: [PATCH v2] acpi/ghes: Prevent sleeping with spinlock held

From: Ira Weiny
Date: Wed Feb 14 2024 - 18:35:27 EST


Ira Weiny wrote:
> Jonathan Cameron wrote:
> > On Wed, 14 Feb 2024 10:23:10 -0500
> > Steven Rostedt <rostedt@xxxxxxxxxxx> wrote:
> >
> > > On Wed, 14 Feb 2024 12:11:53 +0000
> > > Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx> wrote:
> > >
> > > > So I'm thinking this is a won't fix - wait for the printk rework to land and
> > > > assume this will be resolved as well?
> > >
> > > That pretty much sums up what I was about to say ;-)
> > >
> > > tp_printk is more of a hack and not to be used sparingly. With the right
> > > trace events it can hang the machine.
> > >
> > > So, you can use your internal patch locally, but I would recommend waiting
> > > for the new printk changes to land.
>
> Steven, Do you think that will land in 6.9?
>
> > >
> > > I'm really hoping that will be soon!
> > >
> > > -- Steve
> >
> > Thanks Steve,
> >
> > Ira's fix is needed for other valid locking reasons - this was 'just another'
> > lock debugging report that came up whilst testing it.
> >
> > For this patch (not a potential additional one that we aren't going to do ;)
> >
> > Tested-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx>
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@xxxxxxxxxx>
>
> Jonathan,
>
> Again thanks for the testing! However, Dan and I just discussed this and
> he has an uneasy feeling about going forward with this for 6.8 final.
>
> If we revert the following patch I can squash this fix and wait for the
> tp_printk() fix to land in 6.9 and resubmit.
>
> Dan here is the patch which backs out the actual bug:
>
> Fixes: 671a794c33c6 ("acpi/ghes: Process CXL Component Events")

Unfortunately this is not the only patch.

We need to revert this too:

Fixes: dc97f6344f20 ("cxl/pci: Register for and process CPER events")

And then revert ...
Fixes: 671a794c33c6 ("acpi/ghes: Process CXL Component Events")

.. but there is a conflict.

Dan, below is the correct revert patch. Let me know if you need more.

Ira

commit 807fbe9cac9b190dab83e3ff377a30d18859c8ab
Author: Ira Weiny <ira.weiny@xxxxxxxxx>
Date: Wed Feb 14 15:25:24 2024 -0800

Revert "acpi/ghes: Process CXL Component Events"

This reverts commit 671a794c33c6e048ca5cedd5ad6af44d52d5d7e5.

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index fe825a432c5b..ab2a82cb1b0b 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -26,7 +26,6 @@
#include <linux/interrupt.h>
#include <linux/timer.h>
#include <linux/cper.h>
-#include <linux/cxl-event.h>
#include <linux/platform_device.h>
#include <linux/mutex.h>
#include <linux/ratelimit.h>
@@ -674,52 +673,6 @@ static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata,
schedule_work(&entry->work);
}

-/*
- * Only a single callback can be registered for CXL CPER events.
- */
-static DECLARE_RWSEM(cxl_cper_rw_sem);
-static cxl_cper_callback cper_callback;
-
-static void cxl_cper_post_event(enum cxl_event_type event_type,
- struct cxl_cper_event_rec *rec)
-{
- if (rec->hdr.length <= sizeof(rec->hdr) ||
- rec->hdr.length > sizeof(*rec)) {
- pr_err(FW_WARN "CXL CPER Invalid section length (%u)\n",
- rec->hdr.length);
- return;
- }
-
- if (!(rec->hdr.validation_bits & CPER_CXL_COMP_EVENT_LOG_VALID)) {
- pr_err(FW_WARN "CXL CPER invalid event\n");
- return;
- }
-
- guard(rwsem_read)(&cxl_cper_rw_sem);
- if (cper_callback)
- cper_callback(event_type, rec);
-}
-
-int cxl_cper_register_callback(cxl_cper_callback callback)
-{
- guard(rwsem_write)(&cxl_cper_rw_sem);
- if (cper_callback)
- return -EINVAL;
- cper_callback = callback;
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_register_callback, CXL);
-
-int cxl_cper_unregister_callback(cxl_cper_callback callback)
-{
- guard(rwsem_write)(&cxl_cper_rw_sem);
- if (callback != cper_callback)
- return -EINVAL;
- cper_callback = NULL;
- return 0;
-}
-EXPORT_SYMBOL_NS_GPL(cxl_cper_unregister_callback, CXL);
-
static bool ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
@@ -754,22 +707,6 @@ static bool ghes_do_proc(struct ghes *ghes,
}
else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) {
queued = ghes_handle_arm_hw_error(gdata, sev, sync);
- } else if (guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID)) {
- struct cxl_cper_event_rec *rec =
- acpi_hest_get_payload(gdata);
-
- cxl_cper_post_event(CXL_CPER_EVENT_GEN_MEDIA, rec);
- } else if (guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID)) {
- struct cxl_cper_event_rec *rec =
- acpi_hest_get_payload(gdata);
-
- cxl_cper_post_event(CXL_CPER_EVENT_DRAM, rec);
- } else if (guid_equal(sec_type,
- &CPER_SEC_CXL_MEM_MODULE_GUID)) {
- struct cxl_cper_event_rec *rec =
- acpi_hest_get_payload(gdata);
-
- cxl_cper_post_event(CXL_CPER_EVENT_MEM_MODULE, rec);
} else {
void *err = acpi_hest_get_payload(gdata);

diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h
index 95841750a383..4d6c05f535f8 100644
--- a/include/linux/cxl-event.h
+++ b/include/linux/cxl-event.h
@@ -107,54 +107,4 @@ struct cxl_event_record_raw {
union cxl_event event;
} __packed;

-enum cxl_event_type {
- CXL_CPER_EVENT_GEN_MEDIA,
- CXL_CPER_EVENT_DRAM,
- CXL_CPER_EVENT_MEM_MODULE,
-};
-
-#define CPER_CXL_DEVICE_ID_VALID BIT(0)
-#define CPER_CXL_DEVICE_SN_VALID BIT(1)
-#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2)
-struct cxl_cper_event_rec {
- struct {
- u32 length;
- u64 validation_bits;
- struct cper_cxl_event_devid {
- u16 vendor_id;
- u16 device_id;
- u8 func_num;
- u8 device_num;
- u8 bus_num;
- u16 segment_num;
- u16 slot_num; /* bits 2:0 reserved */
- u8 reserved;
- } __packed device_id;
- struct cper_cxl_event_sn {
- u32 lower_dw;
- u32 upper_dw;
- } __packed dev_serial_num;
- } __packed hdr;
-
- union cxl_event event;
-} __packed;
-
-typedef void (*cxl_cper_callback)(enum cxl_event_type type,
- struct cxl_cper_event_rec *rec);
-
-#ifdef CONFIG_ACPI_APEI_GHES
-int cxl_cper_register_callback(cxl_cper_callback callback);
-int cxl_cper_unregister_callback(cxl_cper_callback callback);
-#else
-static inline int cxl_cper_register_callback(cxl_cper_callback callback)
-{
- return 0;
-}
-
-static inline int cxl_cper_unregister_callback(cxl_cper_callback callback)
-{
- return 0;
-}
-#endif
-
#endif /* _LINUX_CXL_EVENT_H */