[PATCH 2/3] habanalabs/gaudi: sync stream add protection to SOB reset flow

From: Oded Gabbay
Date: Fri Apr 02 2021 - 15:19:36 EST


From: farah kassabri <fkassabri@xxxxxxxxx>

Since we moved the SOB reset flow to workqueue and
not part of the fence release flow, we might reach a
scenario where new context is created while we in the middle
of resetting the SOB.
in such cases the reset may fail due to idle check.
This will mess up the streams sync since the SOB value is invalid.
so we protect this area with a mutex, to delay context creation.

Signed-off-by: farah kassabri <fkassabri@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/gaudi/gaudi.c | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index f273b792bc5d..bd711c8d3874 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -5729,18 +5729,26 @@ static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
static int gaudi_schedule_register_memset(struct hl_device *hdev,
u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
{
- struct hl_ctx *ctx = hdev->compute_ctx;
+ struct hl_ctx *ctx;
struct hl_pending_cb *pending_cb;
struct packet_msg_long *pkt;
u32 cb_size, ctl;
struct hl_cb *cb;
- int i;
+ int i, rc;
+
+ mutex_lock(&hdev->fpriv_list_lock);
+ ctx = hdev->compute_ctx;

/* If no compute context available or context is going down
* memset registers directly
*/
- if (!ctx || kref_read(&ctx->refcount) == 0)
- return gaudi_memset_registers(hdev, reg_base, num_regs, val);
+ if (!ctx || kref_read(&ctx->refcount) == 0) {
+ rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
+ mutex_unlock(&hdev->fpriv_list_lock);
+ return rc;
+ }
+
+ mutex_unlock(&hdev->fpriv_list_lock);

cb_size = (sizeof(*pkt) * num_regs) +
sizeof(struct packet_msg_prot) * 2;
--
2.25.1