RE: [PATCH V2 03/11] cxl/mem: Implement Clear Event Records command

From: Dan Williams
Date: Thu Dec 01 2022 - 21:29:41 EST


ira.weiny@ wrote:
> From: Ira Weiny <ira.weiny@xxxxxxxxx>
>
> CXL rev 3.0 section 8.2.9.2.3 defines the Clear Event Records mailbox
> command. After an event record is read it needs to be cleared from the
> event log.
>
> Implement cxl_clear_event_record() to clear all record retrieved from
> the device.
>
> Each record is cleared explicitly. A clear all bit is specified but
> events could arrive between a get and any final clear all operation.
> This means events would be missed.
> Therefore each event is cleared specifically.

Note that the spec has a better reason for why Clear All has limited
usage:

"Clear All Events is only allowed when the Event Log has overflowed;
otherwise, the device shall return Invalid Input."

Will need to wait and see if we need that to keep pace with a device
with a high event frequency.

>
> Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx>
>
> ---
> Changes from V1:
> Clear Event Record allows for u8 handles while Get Event Record
> allows for u16 records to be returned. Based on Jonathan's
> feedback; allow for all event records to be handled in this
> clear. Which means a double loop with potentially multiple
> Clear Event payloads being sent to clear all events sent.
>
> Changes from RFC:
> Jonathan
> Clean up init of payload and use return code.
> Also report any error to clear the event.
> s/v3.0/rev 3.0
> ---
> drivers/cxl/core/mbox.c | 61 +++++++++++++++++++++++++++++++-----
> drivers/cxl/cxlmem.h | 14 +++++++++
> include/uapi/linux/cxl_mem.h | 1 +
> 3 files changed, 69 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 70b681027a3d..076a3df0ba38 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -52,6 +52,7 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
> #endif
> CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
> CXL_CMD(GET_EVENT_RECORD, 1, CXL_VARIABLE_PAYLOAD, 0),
> + CXL_CMD(CLEAR_EVENT_RECORD, CXL_VARIABLE_PAYLOAD, 0, 0),
> CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
> CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
> CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0),
> @@ -708,6 +709,42 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
> }
> EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
>
> +static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
> + enum cxl_event_log_type log,
> + struct cxl_get_event_payload *get_pl,
> + u16 total)
> +{
> + struct cxl_mbox_clear_event_payload payload = {
> + .event_log = log,
> + };
> + int cnt;
> +
> + /*
> + * Clear Event Records uses u8 for the handle cnt while Get Event
> + * Record can return up to 0xffff records.
> + */
> + for (cnt = 0; cnt < total; /* cnt incremented internally */) {
> + u8 nr_recs = min_t(u8, (total - cnt),
> + CXL_CLEAR_EVENT_MAX_HANDLES);

This seems overly complicated. @total is a duplicate of
@get_pl->record_count, and the 2 loops feel like it could be cut
down to one.

> + int i, rc;
> +
> + for (i = 0; i < nr_recs; i++, cnt++) {
> + payload.handle[i] = get_pl->records[cnt].hdr.handle;
> + dev_dbg(cxlds->dev, "Event log '%s': Clearning %u\n",

While I do think this operation is a mix of clearing and cleaning event
records, I don't think "Clearning" is a word.

> + cxl_event_log_type_str(log),
> + le16_to_cpu(payload.handle[i]));
> + }
> + payload.nr_recs = nr_recs;
> +
> + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_CLEAR_EVENT_RECORD,
> + &payload, sizeof(payload), NULL, 0);
> + if (rc)
> + return rc;
> + }
> +
> + return 0;
> +}
> +
> static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> enum cxl_event_log_type type)
> {
> @@ -732,13 +769,22 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> }
>
> nr_rec = le16_to_cpu(payload->record_count);
> - if (trace_cxl_generic_event_enabled()) {
> + if (nr_rec > 0) {
> int i;
>
> - for (i = 0; i < nr_rec; i++)
> - trace_cxl_generic_event(dev_name(cxlds->dev),
> - type,
> - &payload->records[i]);
> + if (trace_cxl_generic_event_enabled()) {

Again, trace_cxl_generic_event_enabled() injects some awkward
formatting here to micro-optimize looping. Any performance benefit this
code might offer is likely offset by the extra human effort to read it.

> + for (i = 0; i < nr_rec; i++)
> + trace_cxl_generic_event(dev_name(cxlds->dev),
> + type,
> + &payload->records[i]);
> + }
> +
> + rc = cxl_clear_event_record(cxlds, type, payload, nr_rec);
> + if (rc) {
> + dev_err(cxlds->dev, "Event log '%s': Failed to clear events : %d",
> + cxl_event_log_type_str(type), rc);
> + return;
> + }
> }
>
> if (trace_cxl_overflow_enabled() &&
> @@ -780,10 +826,11 @@ static struct cxl_get_event_payload *alloc_event_buf(struct cxl_dev_state *cxlds
> * cxl_mem_get_event_records - Get Event Records from the device
> * @cxlds: The device data for the operation
> *
> - * Retrieve all event records available on the device and report them as trace
> - * events.
> + * Retrieve all event records available on the device, report them as trace
> + * events, and clear them.
> *
> * See CXL rev 3.0 @8.2.9.2.2 Get Event Records
> + * See CXL rev 3.0 @8.2.9.2.3 Clear Event Records
> */
> void cxl_mem_get_event_records(struct cxl_dev_state *cxlds)
> {
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 55d57f5a64bc..1ae9962c5a06 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -261,6 +261,7 @@ enum cxl_opcode {
> CXL_MBOX_OP_INVALID = 0x0000,
> CXL_MBOX_OP_RAW = CXL_MBOX_OP_INVALID,
> CXL_MBOX_OP_GET_EVENT_RECORD = 0x0100,
> + CXL_MBOX_OP_CLEAR_EVENT_RECORD = 0x0101,
> CXL_MBOX_OP_GET_FW_INFO = 0x0200,
> CXL_MBOX_OP_ACTIVATE_FW = 0x0202,
> CXL_MBOX_OP_GET_SUPPORTED_LOGS = 0x0400,
> @@ -396,6 +397,19 @@ static inline const char *cxl_event_log_type_str(enum cxl_event_log_type type)
> return "<unknown>";
> }
>
> +/*
> + * Clear Event Records input payload
> + * CXL rev 3.0 section 8.2.9.2.3; Table 8-51
> + */
> +#define CXL_CLEAR_EVENT_MAX_HANDLES (0xff)
> +struct cxl_mbox_clear_event_payload {
> + u8 event_log; /* enum cxl_event_log_type */
> + u8 clear_flags;
> + u8 nr_recs;
> + u8 reserved[3];
> + __le16 handle[CXL_CLEAR_EVENT_MAX_HANDLES];
> +};
> +
> struct cxl_mbox_get_partition_info {
> __le64 active_volatile_cap;
> __le64 active_persistent_cap;
> diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h
> index 70459be5bdd4..7c1ad8062792 100644
> --- a/include/uapi/linux/cxl_mem.h
> +++ b/include/uapi/linux/cxl_mem.h
> @@ -25,6 +25,7 @@
> ___C(RAW, "Raw device command"), \
> ___C(GET_SUPPORTED_LOGS, "Get Supported Logs"), \
> ___C(GET_EVENT_RECORD, "Get Event Record"), \
> + ___C(CLEAR_EVENT_RECORD, "Clear Event Record"), \
> ___C(GET_FW_INFO, "Get FW Info"), \
> ___C(GET_PARTITION_INFO, "Get Partition Information"), \
> ___C(GET_LSA, "Get Label Storage Area"), \

Same, "yikes" / "must be at the end of the enum" feedback.