[PATCH 7/9] habanalabs: added open_stats info ioctl

From: Oded Gabbay
Date: Mon Jun 14 2021 - 08:51:49 EST


From: Yuri Nudelman <ynudelman@xxxxxxxxx>

In a system with multiple ASICs, there is a need to provide monitoring
tools with information on how long a device was opened and how many
times a device was opened.

Therefore, we add a new opcode to the INFO ioctl to provide that
information.

Signed-off-by: Yuri Nudelman <ynudelman@xxxxxxxxx>
Reviewed-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
---
drivers/misc/habanalabs/common/device.c | 3 +++
drivers/misc/habanalabs/common/habanalabs.h | 8 +++++++
.../misc/habanalabs/common/habanalabs_drv.c | 3 +++
.../misc/habanalabs/common/habanalabs_ioctl.c | 21 +++++++++++++++++++
include/uapi/misc/habanalabs.h | 12 +++++++++++
5 files changed, 47 insertions(+)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index e56f5170e338..37ce38d9a1a7 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -132,6 +132,9 @@ static int hl_device_release(struct inode *inode, struct file *filp)
dev_warn(hdev->dev,
"Device is still in use because there are live CS and/or memory mappings\n");

+ hdev->last_open_session_duration_jif =
+ jiffies - hdev->last_successful_open_jif;
+
return 0;
}

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 244fbf209d34..6c9a81c2cfe7 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2137,6 +2137,11 @@ struct hl_mmu_funcs {
* the error will be ignored by the driver during
* device initialization. Mainly used to debug and
* workaround firmware bugs
+ * @last_successful_open_jif: timestamp (jiffies) of the last successful
+ * device open.
+ * @last_open_session_duration_jif: duration (jiffies) of the last device open
+ * session.
+ * @open_counter: number of successful device open operations.
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @card_type: Various ASICs have several card types. This indicates the card
@@ -2259,6 +2264,9 @@ struct hl_device {
u64 max_power;
u64 clock_gating_mask;
u64 boot_error_status_mask;
+ u64 last_successful_open_jif;
+ u64 last_open_session_duration_jif;
+ u64 open_counter;
atomic_t in_reset;
enum hl_pll_frequency curr_pll_profile;
enum cpucp_card_types card_type;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 4d377a39df13..4194cda2d04c 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -187,6 +187,9 @@ int hl_device_open(struct inode *inode, struct file *filp)

hl_debugfs_add_file(hpriv);

+ hdev->open_counter++;
+ hdev->last_successful_open_jif = jiffies;
+
return 0;

out_err:
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 6604d30246e6..f4dda7b4acdd 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -460,6 +460,24 @@ static int power_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
min((size_t) max_size, sizeof(power_info))) ? -EFAULT : 0;
}

+static int open_stats_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ u32 max_size = args->return_size;
+ struct hl_open_stats_info open_stats_info = {0};
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ open_stats_info.last_open_period_ms = jiffies64_to_msecs(
+ hdev->last_open_session_duration_jif);
+ open_stats_info.open_counter = hdev->open_counter;
+
+ return copy_to_user(out, &open_stats_info,
+ min((size_t) max_size, sizeof(open_stats_info))) ? -EFAULT : 0;
+}
+
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@@ -543,6 +561,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_POWER:
return power_info(hpriv, args);

+ case HL_INFO_OPEN_STATS:
+ return open_stats_info(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a47485a8d411..a47a731e4527 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -313,6 +313,7 @@ enum hl_device_status {
* HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore
* HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption
* HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency
+ * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
@@ -331,6 +332,7 @@ enum hl_device_status {
#define HL_INFO_TOTAL_ENERGY 15
#define HL_INFO_PLL_FREQUENCY 16
#define HL_INFO_POWER 17
+#define HL_INFO_OPEN_STATS 18

#define HL_INFO_VERSION_MAX_LEN 128
#define HL_INFO_CARD_NAME_MAX_LEN 16
@@ -444,6 +446,16 @@ struct hl_pll_frequency_info {
__u16 output[HL_PLL_NUM_OUTPUTS];
};

+/**
+ * struct hl_open_stats_info - device open statistics information
+ * @open_counter: ever growing counter, increased on each successful dev open
+ * @last_open_period_ms: duration (ms) device was open last time
+ */
+struct hl_open_stats_info {
+ __u64 open_counter;
+ __u64 last_open_period_ms;
+};
+
/**
* struct hl_power_info - power information
* @power: power consumption
--
2.25.1