Re: [PATCH v5 09/17] drm/imagination: Implement power management

From: Paul Cercueil
Date: Wed Aug 16 2023 - 06:58:15 EST


Hi Sarah,

Le mercredi 16 août 2023 à 09:25 +0100, Sarah Walker a écrit :
> Add power management to the driver, using runtime pm. The power off
> sequence depends on firmware commands which are not implemented in
> this
> patch.
>
> Changes since v4:
> - Suspend runtime PM before unplugging device on rmmod
>
> Changes since v3:
> - Don't power device when calling pvr_device_gpu_fini()
> - Documentation for pvr_dev->lost has been improved
> - pvr_power_init() renamed to pvr_watchdog_init()
> - Use drm_dev_{enter,exit}
>
> Changes since v2:
> - Use runtime PM
> - Implement watchdog
>
> Signed-off-by: Sarah Walker <sarah.walker@xxxxxxxxxx>
> ---
>  drivers/gpu/drm/imagination/Makefile     |   1 +
>  drivers/gpu/drm/imagination/pvr_device.c |  23 +-
>  drivers/gpu/drm/imagination/pvr_device.h |  22 ++
>  drivers/gpu/drm/imagination/pvr_drv.c    |  20 +-
>  drivers/gpu/drm/imagination/pvr_power.c  | 271
> +++++++++++++++++++++++
>  drivers/gpu/drm/imagination/pvr_power.h  |  39 ++++
>  6 files changed, 373 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/gpu/drm/imagination/pvr_power.c
>  create mode 100644 drivers/gpu/drm/imagination/pvr_power.h
>
> diff --git a/drivers/gpu/drm/imagination/Makefile
> b/drivers/gpu/drm/imagination/Makefile
> index 8fcabc1bea36..235e2d329e29 100644
> --- a/drivers/gpu/drm/imagination/Makefile
> +++ b/drivers/gpu/drm/imagination/Makefile
> @@ -10,6 +10,7 @@ powervr-y := \
>         pvr_fw.o \
>         pvr_gem.o \
>         pvr_mmu.o \
> +       pvr_power.o \
>         pvr_vm.o
>  
>  obj-$(CONFIG_DRM_POWERVR) += powervr.o
> diff --git a/drivers/gpu/drm/imagination/pvr_device.c
> b/drivers/gpu/drm/imagination/pvr_device.c
> index ef8f7a2ff1a9..5dbd05f21238 100644
> --- a/drivers/gpu/drm/imagination/pvr_device.c
> +++ b/drivers/gpu/drm/imagination/pvr_device.c
> @@ -5,6 +5,7 @@
>  #include "pvr_device_info.h"
>  
>  #include "pvr_fw.h"
> +#include "pvr_power.h"
>  #include "pvr_rogue_cr_defs.h"
>  #include "pvr_vm.h"
>  
> @@ -357,6 +358,8 @@ pvr_device_gpu_fini(struct pvr_device *pvr_dev)
>  int
>  pvr_device_init(struct pvr_device *pvr_dev)
>  {
> +       struct drm_device *drm_dev = from_pvr_device(pvr_dev);
> +       struct device *dev = drm_dev->dev;
>         int err;
>  
>         /* Enable and initialize clocks required for the device to
> operate. */
> @@ -364,13 +367,29 @@ pvr_device_init(struct pvr_device *pvr_dev)
>         if (err)
>                 return err;
>  
> +       /* Explicitly power the GPU so we can access control
> registers before the FW is booted. */
> +       err = pm_runtime_resume_and_get(dev);
> +       if (err)
> +               return err;
> +
>         /* Map the control registers into memory. */
>         err = pvr_device_reg_init(pvr_dev);
>         if (err)
> -               return err;
> +               goto err_pm_runtime_put;
>  
>         /* Perform GPU-specific initialization steps. */
> -       return pvr_device_gpu_init(pvr_dev);
> +       err = pvr_device_gpu_init(pvr_dev);
> +       if (err)
> +               goto err_pm_runtime_put;
> +
> +       pm_runtime_put(dev);
> +
> +       return 0;
> +
> +err_pm_runtime_put:
> +       pm_runtime_put_sync_suspend(dev);
> +
> +       return err;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/imagination/pvr_device.h
> b/drivers/gpu/drm/imagination/pvr_device.h
> index 990363e433d7..6d58ecfbc838 100644
> --- a/drivers/gpu/drm/imagination/pvr_device.h
> +++ b/drivers/gpu/drm/imagination/pvr_device.h
> @@ -135,6 +135,28 @@ struct pvr_device {
>  
>         /** @fw_dev: Firmware related data. */
>         struct pvr_fw_device fw_dev;
> +
> +       struct {
> +               /** @work: Work item for watchdog callback. */
> +               struct delayed_work work;
> +
> +               /** @old_kccb_cmds_executed: KCCB command execution
> count at last watchdog poll. */
> +               u32 old_kccb_cmds_executed;
> +
> +               /** @kccb_stall_count: Number of watchdog polls KCCB
> has been stalled for. */
> +               u32 kccb_stall_count;
> +       } watchdog;
> +
> +       /**
> +        * @lost: %true if the device has been lost.
> +        *
> +        * This variable is set if the device has become
> irretrievably unavailable, e.g. if the
> +        * firmware processor has stopped responding and can not be
> revived via a hard reset.
> +        */
> +       bool lost;
> +
> +       /** @sched_wq: Workqueue for schedulers. */
> +       struct workqueue_struct *sched_wq;
>  };
>  
>  /**
> diff --git a/drivers/gpu/drm/imagination/pvr_drv.c
> b/drivers/gpu/drm/imagination/pvr_drv.c
> index 0d51177b506c..0331dc549116 100644
> --- a/drivers/gpu/drm/imagination/pvr_drv.c
> +++ b/drivers/gpu/drm/imagination/pvr_drv.c
> @@ -4,6 +4,7 @@
>  #include "pvr_device.h"
>  #include "pvr_drv.h"
>  #include "pvr_gem.h"
> +#include "pvr_power.h"
>  #include "pvr_rogue_defs.h"
>  #include "pvr_rogue_fwif_client.h"
>  #include "pvr_rogue_fwif_shared.h"
> @@ -1279,9 +1280,16 @@ pvr_probe(struct platform_device *plat_dev)
>  
>         platform_set_drvdata(plat_dev, drm_dev);
>  
> +       devm_pm_runtime_enable(&plat_dev->dev);
> +       pm_runtime_mark_last_busy(&plat_dev->dev);
> +
> +       pm_runtime_set_autosuspend_delay(&plat_dev->dev, 50);
> +       pm_runtime_use_autosuspend(&plat_dev->dev);
> +       pvr_watchdog_init(pvr_dev);
> +
>         err = pvr_device_init(pvr_dev);
>         if (err)
> -               return err;
> +               goto err_watchdog_fini;
>  
>         err = drm_dev_register(drm_dev, 0);
>         if (err)
> @@ -1292,6 +1300,9 @@ pvr_probe(struct platform_device *plat_dev)
>  err_device_fini:
>         pvr_device_fini(pvr_dev);
>  
> +err_watchdog_fini:
> +       pvr_watchdog_fini(pvr_dev);
> +
>         return err;
>  }
>  
> @@ -1301,8 +1312,10 @@ pvr_remove(struct platform_device *plat_dev)
>         struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
>         struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
>  
> +       pm_runtime_suspend(drm_dev->dev);
>         drm_dev_unplug(drm_dev);
>         pvr_device_fini(pvr_dev);
> +       pvr_watchdog_fini(pvr_dev);
>  
>         return 0;
>  }
> @@ -1313,11 +1326,16 @@ static const struct of_device_id dt_match[] =
> {
>  };
>  MODULE_DEVICE_TABLE(of, dt_match);
>  
> +static const struct dev_pm_ops pvr_pm_ops = {
> +       SET_RUNTIME_PM_OPS(pvr_power_device_suspend,
> pvr_power_device_resume, pvr_power_device_idle)
> +};
> +
>  static struct platform_driver pvr_driver = {
>         .probe = pvr_probe,
>         .remove = pvr_remove,
>         .driver = {
>                 .name = PVR_DRIVER_NAME,
> +               .pm = &pvr_pm_ops,
>                 .of_match_table = dt_match,
>         },
>  };
> diff --git a/drivers/gpu/drm/imagination/pvr_power.c
> b/drivers/gpu/drm/imagination/pvr_power.c
> new file mode 100644
> index 000000000000..a494fed92e81
> --- /dev/null
> +++ b/drivers/gpu/drm/imagination/pvr_power.c
> @@ -0,0 +1,271 @@
> +// SPDX-License-Identifier: GPL-2.0 OR MIT
> +/* Copyright (c) 2023 Imagination Technologies Ltd. */
> +
> +#include "pvr_device.h"
> +#include "pvr_fw.h"
> +#include "pvr_power.h"
> +#include "pvr_rogue_fwif.h"
> +
> +#include <drm/drm_drv.h>
> +#include <drm/drm_managed.h>
> +#include <linux/clk.h>
> +#include <linux/interrupt.h>
> +#include <linux/mutex.h>
> +#include <linux/platform_device.h>
> +#include <linux/pm_runtime.h>
> +#include <linux/timer.h>
> +#include <linux/types.h>
> +#include <linux/workqueue.h>
> +
> +#define POWER_SYNC_TIMEOUT_US (1000000) /* 1s */
> +
> +#define WATCHDOG_TIME_MS (500)
> +
> +static int
> +pvr_power_send_command(struct pvr_device *pvr_dev, struct
> rogue_fwif_kccb_cmd *pow_cmd)
> +{
> +       /* TODO: implement */
> +       return -ENODEV;
> +}
> +
> +static int
> +pvr_power_request_idle(struct pvr_device *pvr_dev)
> +{
> +       struct rogue_fwif_kccb_cmd pow_cmd;
> +
> +       /* Send FORCED_IDLE request to FW. */
> +       pow_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_POW;
> +       pow_cmd.cmd_data.pow_data.pow_type =
> ROGUE_FWIF_POW_FORCED_IDLE_REQ;
> +       pow_cmd.cmd_data.pow_data.power_req_data.pow_request_type =
> ROGUE_FWIF_POWER_FORCE_IDLE;
> +
> +       return pvr_power_send_command(pvr_dev, &pow_cmd);
> +}
> +
> +static int
> +pvr_power_request_pwr_off(struct pvr_device *pvr_dev)
> +{
> +       struct rogue_fwif_kccb_cmd pow_cmd;
> +
> +       /* Send POW_OFF request to firmware. */
> +       pow_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_POW;
> +       pow_cmd.cmd_data.pow_data.pow_type = ROGUE_FWIF_POW_OFF_REQ;
> +       pow_cmd.cmd_data.pow_data.power_req_data.forced = true;
> +
> +       return pvr_power_send_command(pvr_dev, &pow_cmd);
> +}
> +
> +static int
> +pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset)
> +{
> +       if (!hard_reset) {
> +               int err;
> +
> +               cancel_delayed_work_sync(&pvr_dev->watchdog.work);
> +
> +               err = pvr_power_request_idle(pvr_dev);
> +               if (err)
> +                       return err;
> +
> +               err = pvr_power_request_pwr_off(pvr_dev);
> +               if (err)
> +                       return err;
> +       }
> +
> +       /* TODO: stop firmware */
> +       return -ENODEV;
> +}
> +
> +static int
> +pvr_power_fw_enable(struct pvr_device *pvr_dev)
> +{
> +       int err;
> +
> +       /* TODO: start firmware */
> +       err = -ENODEV;
> +       if (err)
> +               return err;
> +
> +       queue_delayed_work(pvr_dev->sched_wq, &pvr_dev-
> >watchdog.work,
> +                          msecs_to_jiffies(WATCHDOG_TIME_MS));
> +
> +       return 0;
> +}
> +
> +bool
> +pvr_power_is_idle(struct pvr_device *pvr_dev)
> +{
> +       /* TODO: implement */
> +       return true;
> +}
> +
> +static bool
> +pvr_watchdog_kccb_stalled(struct pvr_device *pvr_dev)
> +{
> +       /* TODO: implement */
> +       return false;
> +}
> +
> +static void
> +pvr_watchdog_worker(struct work_struct *work)
> +{
> +       struct pvr_device *pvr_dev = container_of(work, struct
> pvr_device,
> +                                                
> watchdog.work.work);
> +       bool stalled;
> +
> +       if (pvr_dev->lost)
> +               return;
> +
> +       if (pm_runtime_get_if_in_use(from_pvr_device(pvr_dev)->dev)
> <= 0)
> +               goto out_requeue;
> +
> +       stalled = pvr_watchdog_kccb_stalled(pvr_dev);
> +
> +       if (stalled) {
> +               drm_err(from_pvr_device(pvr_dev), "FW stalled, trying
> hard reset");
> +
> +               pvr_power_reset(pvr_dev, true);
> +               /* Device may be lost at this point. */
> +       }
> +
> +       pm_runtime_put(from_pvr_device(pvr_dev)->dev);
> +
> +out_requeue:
> +       if (!pvr_dev->lost) {
> +               queue_delayed_work(pvr_dev->sched_wq, &pvr_dev-
> >watchdog.work,
> +                                 
> msecs_to_jiffies(WATCHDOG_TIME_MS));
> +       }
> +}
> +
> +/**
> + * pvr_watchdog_init() - Initialise watchdog for device
> + * @pvr_dev: Target PowerVR device.
> + *
> + * Returns:
> + *  * 0 on success, or
> + *  * -%ENOMEM on out of memory.
> + */
> +int
> +pvr_watchdog_init(struct pvr_device *pvr_dev)
> +{
> +       INIT_DELAYED_WORK(&pvr_dev->watchdog.work,
> pvr_watchdog_worker);
> +
> +       return 0;
> +}
> +
> +int
> +pvr_power_device_suspend(struct device *dev)
> +{
> +       struct platform_device *plat_dev = to_platform_device(dev);
> +       struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
> +       struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
> +       int idx;
> +
> +       if (!drm_dev_enter(drm_dev, &idx))
> +               return -EIO;
> +
> +       clk_disable_unprepare(pvr_dev->mem_clk);
> +       clk_disable_unprepare(pvr_dev->sys_clk);
> +       clk_disable_unprepare(pvr_dev->core_clk);
> +
> +       drm_dev_exit(idx);
> +
> +       return 0;
> +}
> +
> +int
> +pvr_power_device_resume(struct device *dev)
> +{
> +       struct platform_device *plat_dev = to_platform_device(dev);
> +       struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
> +       struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
> +       int idx;
> +       int err;
> +
> +       if (!drm_dev_enter(drm_dev, &idx))
> +               return -EIO;
> +
> +       err = clk_prepare_enable(pvr_dev->core_clk);
> +       if (err)
> +               goto err_drm_dev_exit;
> +
> +       err = clk_prepare_enable(pvr_dev->sys_clk);
> +       if (err)
> +               goto err_core_clk_disable;
> +
> +       err = clk_prepare_enable(pvr_dev->mem_clk);
> +       if (err)
> +               goto err_sys_clk_disable;
> +
> +       drm_dev_exit(idx);
> +
> +       return 0;
> +
> +err_sys_clk_disable:
> +       clk_disable_unprepare(pvr_dev->sys_clk);
> +
> +err_core_clk_disable:
> +       clk_disable_unprepare(pvr_dev->core_clk);
> +
> +err_drm_dev_exit:
> +       drm_dev_exit(idx);
> +
> +       return err;
> +}
> +
> +int
> +pvr_power_device_idle(struct device *dev)
> +{
> +       struct platform_device *plat_dev = to_platform_device(dev);
> +       struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
> +       struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
> +
> +       return pvr_power_is_idle(pvr_dev) ? 0 : -EBUSY;
> +}

You have two options here. Either you require CONFIG_PM, and in that
case you should add a dependency for it in the Kconfig. Also use
RUNTIME_PM_OPS() instead of SET_RUNTIME_PM_OPS().

Otherwise, I would suggest to use EXPORT_NS_RUNTIME_DEV_PM_OPS() here,
which allows you to have pvr_power_device_{suspend,resume,idle} static,
and having them garbage-collected in the !CONFIG_PM case.

You can then have an "extern struct dev_pm_ops pvr_power_pm_ops;" in
your header, and reference it in the driver using the pm_ptr() macro.

Cheers,
-Paul

> +
> +/**
> + * pvr_power_reset() - Reset the GPU
> + * @pvr_dev: Device pointer
> + * @hard_reset: %true for hard reset, %false for soft reset
> + *
> + * If @hard_reset is %false and the FW processor fails to respond
> during the reset process, this
> + * function will attempt a hard reset.
> + *
> + * If a hard reset fails then the GPU device is reported as lost.
> + *
> + * Returns:
> + *  * 0 on success, or
> + *  * Any error code returned by pvr_power_get, pvr_power_fw_disable
> or pvr_power_fw_enable().
> + */
> +int
> +pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
> +{
> +       /* TODO: Implement hard reset. */
> +       int err;
> +
> +       /*
> +        * Take a power reference during the reset. This should
> prevent any interference with the
> +        * power state during reset.
> +        */
> +       WARN_ON(pvr_power_get(pvr_dev));
> +
> +       err = pvr_power_fw_disable(pvr_dev, false);
> +       if (err)
> +               goto err_power_put;
> +
> +       err = pvr_power_fw_enable(pvr_dev);
> +
> +err_power_put:
> +       pvr_power_put(pvr_dev);
> +
> +       return err;
> +}
> +
> +/**
> + * pvr_watchdog_fini() - Shutdown watchdog for device
> + * @pvr_dev: Target PowerVR device.
> + */
> +void
> +pvr_watchdog_fini(struct pvr_device *pvr_dev)
> +{
> +       cancel_delayed_work_sync(&pvr_dev->watchdog.work);
> +}
> diff --git a/drivers/gpu/drm/imagination/pvr_power.h
> b/drivers/gpu/drm/imagination/pvr_power.h
> new file mode 100644
> index 000000000000..439f08d13655
> --- /dev/null
> +++ b/drivers/gpu/drm/imagination/pvr_power.h
> @@ -0,0 +1,39 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR MIT */
> +/* Copyright (c) 2023 Imagination Technologies Ltd. */
> +
> +#ifndef PVR_POWER_H
> +#define PVR_POWER_H
> +
> +#include "pvr_device.h"
> +
> +#include <linux/mutex.h>
> +#include <linux/pm_runtime.h>
> +
> +int pvr_watchdog_init(struct pvr_device *pvr_dev);
> +void pvr_watchdog_fini(struct pvr_device *pvr_dev);
> +
> +bool pvr_power_is_idle(struct pvr_device *pvr_dev);
> +
> +int pvr_power_device_suspend(struct device *dev);
> +int pvr_power_device_resume(struct device *dev);
> +int pvr_power_device_idle(struct device *dev);
> +
> +int pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset);
> +
> +static __always_inline int
> +pvr_power_get(struct pvr_device *pvr_dev)
> +{
> +       struct drm_device *drm_dev = from_pvr_device(pvr_dev);
> +
> +       return pm_runtime_resume_and_get(drm_dev->dev);
> +}
> +
> +static __always_inline int
> +pvr_power_put(struct pvr_device *pvr_dev)
> +{
> +       struct drm_device *drm_dev = from_pvr_device(pvr_dev);
> +
> +       return pm_runtime_put(drm_dev->dev);
> +}
> +
> +#endif /* PVR_POWER_H */