[PATCH 1/2] habanalabs: Add debugfs node for engines status

From: Tomer Tayar
Date: Mon Jul 01 2019 - 04:38:22 EST


Command submissions sent to the device are composed of command buffers
which are targeted to different device engines, like DMA and compute
entities. When a command submission gets stuck, knowing in which engine
the stuck is, is crucial for debugging.
This patch adds a debugfs node that exports this information, by
displaying the engines' various registers that assemble their idle/busy
status.

The information retrieval is based on the is_device_idle ASIC function.
The printout in this function, of the first detected busy engine, is
removed because it becomes redundant in the presence of the more
elaborated info of the new debugfs node.

The patch also updates the device idle check:
- Add reading the DMA core status register, because it is possible that
QMAN has finished its work but the DMA itself is still running.
- Remove the MME shadow status check, as the MME ARCH status register
includes the status of all MME shadows.

Signed-off-by: Tomer Tayar <ttayar@xxxxxxxxx>
---
drivers/misc/habanalabs/debugfs.c | 12 +
drivers/misc/habanalabs/goya/goya.c | 127 ++++--
drivers/misc/habanalabs/habanalabs.h | 8 +-
drivers/misc/habanalabs/habanalabs_ioctl.c | 2 +-
.../include/goya/asic_reg/dma_ch_0_masks.h | 418 ++++++++++++++++++
.../include/goya/asic_reg/goya_regs.h | 1 +
6 files changed, 517 insertions(+), 51 deletions(-)
create mode 100644 drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h

diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 17974919b760..6a5dfb14eca1 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -500,6 +500,17 @@ static ssize_t mmu_write(struct file *file, const char __user *buf,
return -EINVAL;
}

+static int engines_show(struct seq_file *s, void *data)
+{
+ struct hl_debugfs_entry *entry = s->private;
+ struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+ struct hl_device *hdev = dev_entry->hdev;
+
+ hdev->asic_funcs->is_device_idle(hdev, s);
+
+ return 0;
+}
+
static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -893,6 +904,7 @@ static const struct hl_info_list hl_debugfs_list[] = {
{"userptr", userptr_show, NULL},
{"vm", vm_show, NULL},
{"mmu", mmu_show, mmu_write},
+ {"engines", engines_show, NULL}
};

static int hl_debugfs_open(struct inode *inode, struct file *file)
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ce127a6f606f..41e97531f300 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -15,6 +15,7 @@
#include <linux/hwmon.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/iommu.h>
+#include <linux/seq_file.h>

/*
* GOYA security scheme:
@@ -90,6 +91,30 @@
#define GOYA_CB_POOL_CB_CNT 512
#define GOYA_CB_POOL_CB_SIZE 0x20000 /* 128KB */

+#define IS_QM_IDLE(engine, qm_glbl_sts0) \
+ (((qm_glbl_sts0) & engine##_QM_IDLE_MASK) == engine##_QM_IDLE_MASK)
+#define IS_DMA_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(DMA, qm_glbl_sts0)
+#define IS_TPC_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(TPC, qm_glbl_sts0)
+#define IS_MME_QM_IDLE(qm_glbl_sts0) IS_QM_IDLE(MME, qm_glbl_sts0)
+
+#define IS_CMDQ_IDLE(engine, cmdq_glbl_sts0) \
+ (((cmdq_glbl_sts0) & engine##_CMDQ_IDLE_MASK) == \
+ engine##_CMDQ_IDLE_MASK)
+#define IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) \
+ IS_CMDQ_IDLE(TPC, cmdq_glbl_sts0)
+#define IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) \
+ IS_CMDQ_IDLE(MME, cmdq_glbl_sts0)
+
+#define IS_DMA_IDLE(dma_core_sts0) \
+ !((dma_core_sts0) & DMA_CH_0_STS0_DMA_BUSY_MASK)
+
+#define IS_TPC_IDLE(tpc_cfg_sts) \
+ (((tpc_cfg_sts) & TPC_CFG_IDLE_MASK) == TPC_CFG_IDLE_MASK)
+
+#define IS_MME_IDLE(mme_arch_sts) \
+ (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
+
+
static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = {
"goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3",
"goya cq 4", "goya cpu eq"
@@ -2796,7 +2821,6 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
dma_addr_t fence_dma_addr;
struct hl_cb *cb;
u32 tmp, timeout;
- char buf[16] = {};
int rc;

if (hdev->pldm)
@@ -2804,10 +2828,9 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
else
timeout = HL_DEVICE_TIMEOUT_USEC;

- if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) {
+ if (!hdev->asic_funcs->is_device_idle(hdev, NULL)) {
dev_err_ratelimited(hdev->dev,
- "Can't send KMD job on QMAN0 because %s is busy\n",
- buf);
+ "Can't send KMD job on QMAN0 because the device is not idle\n");
return -EBUSY;
}

@@ -4891,57 +4914,75 @@ int goya_armcp_info_get(struct hl_device *hdev)
return 0;
}

-static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
+static bool goya_is_device_idle(struct hl_device *hdev, struct seq_file *s)
{
- u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
+ const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
+ const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
+ u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
+ mme_arch_sts;
+ bool is_idle = true, is_eng_idle;
+ u64 offset;
int i;

+ if (s)
+ seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n"
+ "--- ------- ------------ -------------\n");
+
offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0;

for (i = 0 ; i < DMA_MAX_NUM ; i++) {
- dma_qm_reg = mmDMA_QM_0_GLBL_STS0 + i * offset;
+ qm_glbl_sts0 = RREG32(mmDMA_QM_0_GLBL_STS0 + i * offset);
+ dma_core_sts0 = RREG32(mmDMA_CH_0_STS0 + i * offset);
+ is_eng_idle = IS_DMA_QM_IDLE(qm_glbl_sts0) &&
+ IS_DMA_IDLE(dma_core_sts0);
+ is_idle &= is_eng_idle;

- if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
- DMA_QM_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
+ if (s)
+ seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
+ qm_glbl_sts0, dma_core_sts0);
}

+ if (s)
+ seq_puts(s,
+ "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n"
+ "--- ------- ------------ -------------- ----------\n");
+
offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;

for (i = 0 ; i < TPC_MAX_NUM ; i++) {
- tpc_qm_reg = mmTPC0_QM_GLBL_STS0 + i * offset;
- tpc_cmdq_reg = mmTPC0_CMDQ_GLBL_STS0 + i * offset;
- tpc_cfg_reg = mmTPC0_CFG_STATUS + i * offset;
-
- if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
- TPC_QM_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "TPC%d_QM", i);
-
- if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
- TPC_CMDQ_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i);
-
- if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
- TPC_CFG_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i);
- }
-
- if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
- MME_QM_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME_QM");
-
- if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
- MME_CMDQ_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME_CMDQ");
-
- if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
- MME_ARCH_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME_ARCH");
-
- if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
- return HL_ENG_BUSY(buf, size, "MME");
-
- return true;
+ qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + i * offset);
+ cmdq_glbl_sts0 = RREG32(mmTPC0_CMDQ_GLBL_STS0 + i * offset);
+ tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + i * offset);
+ is_eng_idle = IS_TPC_QM_IDLE(qm_glbl_sts0) &&
+ IS_TPC_CMDQ_IDLE(cmdq_glbl_sts0) &&
+ IS_TPC_IDLE(tpc_cfg_sts);
+ is_idle &= is_eng_idle;
+
+ if (s)
+ seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
+ qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
+ }
+
+ if (s)
+ seq_puts(s,
+ "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n"
+ "--- ------- ------------ -------------- -----------\n");
+
+ qm_glbl_sts0 = RREG32(mmMME_QM_GLBL_STS0);
+ cmdq_glbl_sts0 = RREG32(mmMME_CMDQ_GLBL_STS0);
+ mme_arch_sts = RREG32(mmMME_ARCH_STATUS);
+ is_eng_idle = IS_MME_QM_IDLE(qm_glbl_sts0) &&
+ IS_MME_CMDQ_IDLE(cmdq_glbl_sts0) &&
+ IS_MME_IDLE(mme_arch_sts);
+ is_idle &= is_eng_idle;
+
+ if (s) {
+ seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
+ cmdq_glbl_sts0, mme_arch_sts);
+ seq_puts(s, "\n");
+ }
+
+ return is_idle;
}

static void goya_hw_queues_lock(struct hl_device *hdev)
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 5e4a631b3d88..2c9ea61099b4 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -557,7 +557,7 @@ struct hl_asic_funcs {
u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
int (*debug_coresight)(struct hl_device *hdev, void *data);
- bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size);
+ bool (*is_device_idle)(struct hl_device *hdev, struct seq_file *s);
int (*soft_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -1112,12 +1112,6 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
(cond) ? 0 : -ETIMEDOUT; \
})

-#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \
- if (buf) \
- snprintf(buf, size, fmt, ##__VA_ARGS__); \
- false; \
- })
-
struct hwmon_chip_info;

/**
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index c641c7eb6f7c..b04585af27ad 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -119,7 +119,7 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;

- hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL, 0);
+ hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL);

return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h
new file mode 100644
index 000000000000..028143408401
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_masks.h
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+/************************************
+ ** This is an auto-generated file **
+ ** DO NOT EDIT BELOW **
+ ************************************/
+
+#ifndef ASIC_REG_DMA_CH_0_MASKS_H_
+#define ASIC_REG_DMA_CH_0_MASKS_H_
+
+/*
+ *****************************************
+ * DMA_CH_0 (Prototype: DMA_CH)
+ *****************************************
+ */
+
+/* DMA_CH_0_CFG0 */
+#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_SHIFT 0
+#define DMA_CH_0_CFG0_RD_MAX_OUTSTAND_MASK 0x3FF
+#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_SHIFT 16
+#define DMA_CH_0_CFG0_WR_MAX_OUTSTAND_MASK 0xFFF0000
+
+/* DMA_CH_0_CFG1 */
+#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_SHIFT 0
+#define DMA_CH_0_CFG1_RD_BUF_MAX_SIZE_MASK 0x3FF
+
+/* DMA_CH_0_ERRMSG_ADDR_LO */
+#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_ERRMSG_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_ERRMSG_ADDR_HI */
+#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_ERRMSG_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_ERRMSG_WDATA */
+#define DMA_CH_0_ERRMSG_WDATA_VAL_SHIFT 0
+#define DMA_CH_0_ERRMSG_WDATA_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_ADDR_LO */
+#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_RD_COMP_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_ADDR_HI */
+#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_RD_COMP_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_RD_COMP_WDATA */
+#define DMA_CH_0_RD_COMP_WDATA_VAL_SHIFT 0
+#define DMA_CH_0_RD_COMP_WDATA_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_ADDR_LO */
+#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_WR_COMP_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_ADDR_HI */
+#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_WR_COMP_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_WR_COMP_WDATA */
+#define DMA_CH_0_WR_COMP_WDATA_VAL_SHIFT 0
+#define DMA_CH_0_WR_COMP_WDATA_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_SRC_ADDR_LO */
+#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_SRC_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_SRC_ADDR_HI */
+#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_SRC_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_DST_ADDR_LO */
+#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_DST_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_DST_ADDR_HI */
+#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_DST_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_LDMA_TSIZE */
+#define DMA_CH_0_LDMA_TSIZE_VAL_SHIFT 0
+#define DMA_CH_0_LDMA_TSIZE_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_COMIT_TRANSFER */
+#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_SHIFT 0
+#define DMA_CH_0_COMIT_TRANSFER_PCI_UPS_WKORDR_MASK 0x1
+#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_SHIFT 1
+#define DMA_CH_0_COMIT_TRANSFER_RD_COMP_EN_MASK 0x2
+#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_SHIFT 2
+#define DMA_CH_0_COMIT_TRANSFER_WR_COMP_EN_MASK 0x4
+#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_SHIFT 3
+#define DMA_CH_0_COMIT_TRANSFER_NOSNOOP_MASK 0x8
+#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_SHIFT 4
+#define DMA_CH_0_COMIT_TRANSFER_SRC_ADDR_INC_DIS_MASK 0x10
+#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_SHIFT 5
+#define DMA_CH_0_COMIT_TRANSFER_DST_ADDR_INC_DIS_MASK 0x20
+#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_SHIFT 6
+#define DMA_CH_0_COMIT_TRANSFER_MEM_SET_MASK 0x40
+#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_SHIFT 15
+#define DMA_CH_0_COMIT_TRANSFER_MOD_TENSOR_MASK 0x8000
+#define DMA_CH_0_COMIT_TRANSFER_CTL_SHIFT 16
+#define DMA_CH_0_COMIT_TRANSFER_CTL_MASK 0xFFFF0000
+
+/* DMA_CH_0_STS0 */
+#define DMA_CH_0_STS0_DMA_BUSY_SHIFT 0
+#define DMA_CH_0_STS0_DMA_BUSY_MASK 0x1
+#define DMA_CH_0_STS0_RD_STS_CTX_FULL_SHIFT 1
+#define DMA_CH_0_STS0_RD_STS_CTX_FULL_MASK 0x2
+#define DMA_CH_0_STS0_WR_STS_CTX_FULL_SHIFT 2
+#define DMA_CH_0_STS0_WR_STS_CTX_FULL_MASK 0x4
+
+/* DMA_CH_0_STS1 */
+#define DMA_CH_0_STS1_RD_STS_CTX_CNT_SHIFT 0
+#define DMA_CH_0_STS1_RD_STS_CTX_CNT_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_STS2 */
+#define DMA_CH_0_STS2_WR_STS_CTX_CNT_SHIFT 0
+#define DMA_CH_0_STS2_WR_STS_CTX_CNT_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_STS3 */
+#define DMA_CH_0_STS3_RD_STS_TRN_CNT_SHIFT 0
+#define DMA_CH_0_STS3_RD_STS_TRN_CNT_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_STS4 */
+#define DMA_CH_0_STS4_WR_STS_TRN_CNT_SHIFT 0
+#define DMA_CH_0_STS4_WR_STS_TRN_CNT_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_SRC_ADDR_LO_STS */
+#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_SHIFT 0
+#define DMA_CH_0_SRC_ADDR_LO_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_SRC_ADDR_HI_STS */
+#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_SHIFT 0
+#define DMA_CH_0_SRC_ADDR_HI_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_SRC_TSIZE_STS */
+#define DMA_CH_0_SRC_TSIZE_STS_VAL_SHIFT 0
+#define DMA_CH_0_SRC_TSIZE_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_DST_ADDR_LO_STS */
+#define DMA_CH_0_DST_ADDR_LO_STS_VAL_SHIFT 0
+#define DMA_CH_0_DST_ADDR_LO_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_DST_ADDR_HI_STS */
+#define DMA_CH_0_DST_ADDR_HI_STS_VAL_SHIFT 0
+#define DMA_CH_0_DST_ADDR_HI_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_DST_TSIZE_STS */
+#define DMA_CH_0_DST_TSIZE_STS_VAL_SHIFT 0
+#define DMA_CH_0_DST_TSIZE_STS_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_EN */
+#define DMA_CH_0_RD_RATE_LIM_EN_VAL_SHIFT 0
+#define DMA_CH_0_RD_RATE_LIM_EN_VAL_MASK 0x1
+
+/* DMA_CH_0_RD_RATE_LIM_RST_TOKEN */
+#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_SHIFT 0
+#define DMA_CH_0_RD_RATE_LIM_RST_TOKEN_VAL_MASK 0xFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_SAT */
+#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_SHIFT 0
+#define DMA_CH_0_RD_RATE_LIM_SAT_VAL_MASK 0xFFFF
+
+/* DMA_CH_0_RD_RATE_LIM_TOUT */
+#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_SHIFT 0
+#define DMA_CH_0_RD_RATE_LIM_TOUT_VAL_MASK 0x7FFFFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_EN */
+#define DMA_CH_0_WR_RATE_LIM_EN_VAL_SHIFT 0
+#define DMA_CH_0_WR_RATE_LIM_EN_VAL_MASK 0x1
+
+/* DMA_CH_0_WR_RATE_LIM_RST_TOKEN */
+#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_SHIFT 0
+#define DMA_CH_0_WR_RATE_LIM_RST_TOKEN_VAL_MASK 0xFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_SAT */
+#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_SHIFT 0
+#define DMA_CH_0_WR_RATE_LIM_SAT_VAL_MASK 0xFFFF
+
+/* DMA_CH_0_WR_RATE_LIM_TOUT */
+#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_SHIFT 0
+#define DMA_CH_0_WR_RATE_LIM_TOUT_VAL_MASK 0x7FFFFFFF
+
+/* DMA_CH_0_CFG2 */
+#define DMA_CH_0_CFG2_FORCE_WORD_SHIFT 0
+#define DMA_CH_0_CFG2_FORCE_WORD_MASK 0x1
+
+/* DMA_CH_0_TDMA_CTL */
+#define DMA_CH_0_TDMA_CTL_DTYPE_SHIFT 0
+#define DMA_CH_0_TDMA_CTL_DTYPE_MASK 0x7
+
+/* DMA_CH_0_TDMA_SRC_BASE_ADDR_LO */
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_BASE_ADDR_HI */
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_BASE_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_0 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_0 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_0 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_0 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_1 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_1 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_1 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_1 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_2 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_2 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_2 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_2 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_3 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_3 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_3 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_3 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_BASE_4 */
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_BASE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_ROI_SIZE_4 */
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_ROI_SIZE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4 */
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_VALID_ELEMENTS_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_START_OFFSET_4 */
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_START_OFFSET_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_SRC_STRIDE_4 */
+#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_SRC_STRIDE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_BASE_ADDR_LO */
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_LO_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_BASE_ADDR_HI */
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_BASE_ADDR_HI_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_0 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_0 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_0 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_0 */
+#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_0_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_1 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_1 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_1 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_1 */
+#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_1_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_2 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_2 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_2 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_2 */
+#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_2_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_3 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_3 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_3 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_3 */
+#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_3_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_BASE_4 */
+#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_BASE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_ROI_SIZE_4 */
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_ROI_SIZE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4 */
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_VALID_ELEMENTS_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_START_OFFSET_4 */
+#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_START_OFFSET_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_TDMA_DST_STRIDE_4 */
+#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_SHIFT 0
+#define DMA_CH_0_TDMA_DST_STRIDE_4_VAL_MASK 0xFFFFFFFF
+
+/* DMA_CH_0_MEM_INIT_BUSY */
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_SHIFT 0
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_DATA_MASK 0xFF
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_SHIFT 8
+#define DMA_CH_0_MEM_INIT_BUSY_SBC_MD_MASK 0x100
+
+#endif /* ASIC_REG_DMA_CH_0_MASKS_H_ */
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
index 506e71e201e1..19b0f0ef1d0b 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
@@ -88,6 +88,7 @@
#include "psoc_global_conf_masks.h"
#include "dma_macro_masks.h"
#include "dma_qm_0_masks.h"
+#include "dma_ch_0_masks.h"
#include "tpc0_qm_masks.h"
#include "tpc0_cmdq_masks.h"
#include "mme_qm_masks.h"
--
2.17.1