[PATCH 2/5] habanalabs: reset after device is actually released

From: Oded Gabbay
Date: Mon Feb 22 2021 - 17:09:01 EST


The device is actually released only after the refcnt of the hpriv
structure is 0, which means all its contexts were closed.

If we reset the device while a context is still open, there are
possibilities for unexpected behavior and crashes. For example, if the
process has a mapping of a register block that is now currently being
reset, and the process writes/reads to that block during the reset,
the device can get stuck.

Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/common/device.c | 32 ++++++++++++-------------
1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index ed1838c15c78..8cc3264ae378 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -70,6 +70,22 @@ static void hpriv_release(struct kref *ref)
mutex_unlock(&hdev->fpriv_list_lock);

kfree(hpriv);
+
+ if (hdev->reset_upon_device_release) {
+ u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
+
+ /* We try soft reset first */
+ hl_device_reset(hdev, false, false);
+
+ /* If device is not idle perform hard reset */
+ if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
+ HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
+ dev_info(hdev->dev,
+ "device is not idle (mask %#llx %#llx) after soft reset, performing hard reset",
+ idle_mask[0], idle_mask[1]);
+ hl_device_reset(hdev, true, false);
+ }
+ }
}

void hl_hpriv_get(struct hl_fpriv *hpriv)
@@ -98,22 +114,6 @@ static int hl_device_release(struct inode *inode, struct file *filp)
hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);

- if (hdev->reset_upon_device_release) {
- u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
-
- /* We try soft reset first */
- hl_device_reset(hdev, false, false);
-
- /* If device is not idle perform hard reset */
- if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
- HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
- dev_info(hdev->dev,
- "device is not idle (mask %#llx %#llx) after soft reset, performing hard reset",
- idle_mask[0], idle_mask[1]);
- hl_device_reset(hdev, true, false);
- }
- }
-
filp->private_data = NULL;

hl_hpriv_put(hpriv);
--
2.25.1