[PATCH 2/4] habanalabs/gaudi: print sync manager SEI interrupt info

From: Oded Gabbay
Date: Tue Jan 12 2021 - 14:10:13 EST


From: Ofir Bitton <obitton@xxxxxxxxx>

Driver must print sync manager SEI information upon receiving
interrupt from FW.

Signed-off-by: Ofir Bitton <obitton@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/gaudi/gaudi.c | 41 +++++++++++++++++++
.../misc/habanalabs/include/common/cpucp_if.h | 7 ++++
.../include/gaudi/gaudi_async_events.h | 4 ++
3 files changed, 52 insertions(+)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 4b602aa7a6a3..126650e3a9ad 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -225,6 +225,12 @@ gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
"MSG AXI LBW returned with error"
};

+enum gaudi_sm_sei_cause {
+ GAUDI_SM_SEI_SO_OVERFLOW,
+ GAUDI_SM_SEI_LBW_4B_UNALIGNED,
+ GAUDI_SM_SEI_AXI_RESPONSE_ERR
+};
+
static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
@@ -6845,6 +6851,34 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
}
}

+static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
+ struct hl_eq_sm_sei_data *sei_data)
+{
+ u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
+
+ switch (sei_data->sei_cause) {
+ case GAUDI_SM_SEI_SO_OVERFLOW:
+ dev_err(hdev->dev,
+ "SM %u SEI Error: SO %u overflow/underflow",
+ index, le16_to_cpu(sei_data->sei_log));
+ break;
+ case GAUDI_SM_SEI_LBW_4B_UNALIGNED:
+ dev_err(hdev->dev,
+ "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
+ index, le16_to_cpu(sei_data->sei_log));
+ break;
+ case GAUDI_SM_SEI_AXI_RESPONSE_ERR:
+ dev_err(hdev->dev,
+ "SM %u SEI Error: AXI ID %u response error",
+ index, le16_to_cpu(sei_data->sei_log));
+ break;
+ default:
+ dev_err(hdev->dev, "Unknown SM SEI cause %u",
+ le16_to_cpu(sei_data->sei_log));
+ break;
+ }
+}
+
static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
struct hl_eq_ecc_data *ecc_data)
{
@@ -7468,6 +7502,13 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
hl_fw_unmask_irq(hdev, event_type);
break;

+ case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
+ gaudi_print_irq_info(hdev, event_type, false);
+ gaudi_print_sm_sei_info(hdev, event_type,
+ &eq_entry->sm_sei_data);
+ hl_fw_unmask_irq(hdev, event_type);
+ break;
+
case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
gaudi_print_clk_change_info(hdev, event_type);
hl_fw_unmask_irq(hdev, event_type);
diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h
index 00bd9b392f93..d75d1077461b 100644
--- a/drivers/misc/habanalabs/include/common/cpucp_if.h
+++ b/drivers/misc/habanalabs/include/common/cpucp_if.h
@@ -58,11 +58,18 @@ struct hl_eq_ecc_data {
__u8 pad[7];
};

+struct hl_eq_sm_sei_data {
+ __le16 sei_log;
+ __u8 sei_cause;
+ __u8 pad[5];
+};
+
struct hl_eq_entry {
struct hl_eq_header hdr;
union {
struct hl_eq_ecc_data ecc_data;
struct hl_eq_hbm_ecc_data hbm_ecc_data;
+ struct hl_eq_sm_sei_data sm_sei_data;
__le64 data[7];
};
};
diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
index 9ccba8437ec9..49335e8334b4 100644
--- a/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
+++ b/drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
@@ -212,6 +212,10 @@ enum gaudi_async_event_id {
GAUDI_EVENT_NIC_SEI_2 = 266,
GAUDI_EVENT_NIC_SEI_3 = 267,
GAUDI_EVENT_NIC_SEI_4 = 268,
+ GAUDI_EVENT_DMA_IF_SEI_0 = 277,
+ GAUDI_EVENT_DMA_IF_SEI_1 = 278,
+ GAUDI_EVENT_DMA_IF_SEI_2 = 279,
+ GAUDI_EVENT_DMA_IF_SEI_3 = 280,
GAUDI_EVENT_PCIE_FLR = 290,
GAUDI_EVENT_TPC0_BMON_SPMU = 300,
GAUDI_EVENT_TPC0_KRN_ERR = 301,
--
2.25.1