Re: [PATCH 3/5] hwmon: (socfpga) Add hardware monitoring support on SoCFPGA platforms

From: Dinh Nguyen
Date: Tue Apr 18 2023 - 13:29:49 EST



On 4/17/2023 4:51 PM, Guenter Roeck wrote:
On 4/17/23 13:55, Dinh Nguyen wrote:

On 4/10/2023 9:44 PM, Guenter Roeck wrote:
On 4/10/23 08:33, dinh.nguyen@xxxxxxxxxxxxxxx wrote:
From: Dinh Nguyen <dinh.nguyen@xxxxxxxxxxxxxxx>

The driver supports 64-bit SoCFPGA platforms for temperature and voltage
reading using the platform's SDM(Secure Device Manager). The driver
also uses the Stratix10 Service layer driver.

This driver only supports OF SoCFPGA 64-bit platforms.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@xxxxxxxxxxxxxxx>
Signed-off-by: Dinh Nguyen <dinh.nguyen@xxxxxxxxxxxxxxx>
---
  Documentation/hwmon/index.rst                 |   1 +
  Documentation/hwmon/socfpga-hwmon.rst         |  30 ++
  drivers/firmware/stratix10-svc.c              |  18 +-

Changes outside the hwmon subsystem need to be in a separate patch.

will separate...


drivers/hwmon/Kconfig |  11 +
  drivers/hwmon/Makefile                        |   1 +
  drivers/hwmon/socfpga-hwmon.c                 | 406 ++++++++++++++++++
  include/linux/firmware/intel/stratix10-smc.h  |  34 ++
  .../firmware/intel/stratix10-svc-client.h     |   6 +
  8 files changed, 506 insertions(+), 1 deletion(-)
  create mode 100644 Documentation/hwmon/socfpga-hwmon.rst
  create mode 100644 drivers/hwmon/socfpga-hwmon.c


...
+
+enum hwmon_type_op {
+    SOCFPGA_HWMON_TYPE_TEMP,
+    SOCFPGA_HWMON_TYPE_VOLT,
+    SOCFPGA_HWMON_TYPE_MAX

Unused define

Removed.


+};
+
+static const char *const hwmon_types_str[] = { "temperature", "voltage" };
+
+static umode_t socfpga_is_visible(const void *dev,
+                  enum hwmon_sensor_types type,
+                  u32 attr, int chan)
+{
+    switch (type) {
+    case hwmon_temp:
+    case hwmon_in:
+        return 0444;
+    default:
+        return 0;
+    }
+}
+
+static void socfpga_smc_callback(struct stratix10_svc_client *client,
+                      struct stratix10_svc_cb_data *data)
+{
+    struct socfpga_hwmon_priv *priv = client->priv;
+    struct arm_smccc_res *res = data->kaddr1;
+
+    if (data->status == BIT(SVC_STATUS_OK))    {
+        if (priv->msg.command == COMMAND_HWMON_READTEMP)
+            priv->temperature.value = res->a0;
+        else
+            priv->voltage.value = res->a0;
+    } else
+        dev_err(client->dev, "%s returned 0x%lX\n", __func__, res->a0);
+

Missing { } in else branch. Please run checkpatch --strict and fix
continuation line alignment issues as well as unbalanced if/else
reports.
Will do.

+ complete(&priv->completion);
+}
+
+static int socfpga_hwmon_send(struct socfpga_hwmon_priv *priv)
+{
+    int ret;
+
+    priv->client.receive_cb = socfpga_smc_callback;
+
+    ret = stratix10_svc_send(priv->chan, &priv->msg);
+    if (ret < 0)
+        return ret;
+
+    if (!wait_for_completion_timeout(&priv->completion, HWMON_TIMEOUT)) {
+        dev_err(priv->client.dev, "SMC call timeout!\n");
+        return -ETIMEDOUT;
+    }
+
+    return 0;
+}
+
+static int socfpga_hwmon_err_to_errno(struct socfpga_hwmon_priv *priv)
+{
+    int value = priv->temperature.value;
+
+    if (!(value & ETEMP_ERROR))
+        return 0;
+

This is odd. int is normally 32 bit, this function is called from
socfpga_read() for temperatures, which presumably are defined
as "signed 32-bit fixed point binary". That means that negative
temperatures would be treated as errors. Please verify.

That's correct, if bit 31 is set, then it indicates an error.


This ...


+    dev_err(priv->client.dev, "temperature sensor code 0x%08x\n", value);
+

Please don't clog the log with such messages.

Removed.


+    value &= ~ETEMP_ERROR;
+    switch (value) {
+    case ETEMP_NOT_PRESENT:
+        return -ENOENT;
+    case ETEMP_CORRUPT:
+    case ETEMP_NOT_INITIALIZED:
+        return -ENODATA;
+    case ETEMP_BUSY:
+        return -EBUSY;
+    case ETEMP_INACTIVE:
+    case ETEMP_TIMEOUT:
+    case ETEMP_TOO_OLD:
+        return -EAGAIN;
+    default:
+        /* Unknown error */
+        return -EINVAL;

Should be -EIO.

Replaced.
+    }
+}
+
+static int socfpga_read(struct device *dev, enum hwmon_sensor_types type,
+            u32 attr, int chan, long *val)
+{
+    struct socfpga_hwmon_priv *priv = dev_get_drvdata(dev);
+    int ret;
+
+    mutex_lock(&priv->lock);
+    reinit_completion(&priv->completion);
+
+    switch (type) {
+    case hwmon_temp:
+        priv->msg.arg[0] = BIT_ULL(priv->temperature.chan[chan]);
+        priv->msg.command = COMMAND_HWMON_READTEMP;
+        if (socfpga_hwmon_send(priv))
+            goto status_done;
+
+        ret = socfpga_hwmon_err_to_errno(priv);
+        if (ret)
+            break;
+        /*
+         * The Temperature Sensor IP core returns the Celsius
+         * temperature value in signed 32-bit fixed point binary

... and this contradict each other. If bit 31 indicates an error,
this can not be a signed 32-bit value.

You're right! I've re-read the spec and should have the the code look for the specific error values:

0x80000000 - inactive

0x80000001 - old value

0x80000002 - invalid channel

0x80000003 -  corrupted.

...

Dinh